{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Compare tfidf vectors\n",
    "\n",
    "As a baseline measurement of textual distance, we should try good old cosine distance on tfidf vectors."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "import csv\n",
    "import pandas as pd\n",
    "import numpy as np\n",
    "import math, sys\n",
    "from collections import Counter\n",
    "from scipy import spatial\n",
    "from matplotlib import pyplot as plt\n",
    "from scipy.stats import pearsonr\n",
    "%matplotlib inline"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Most common 10,000 words by document frequency\n",
    "\n",
    "I'm restricting this mostly to real English words, plus a few ringers."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "realwords = set()\n",
    "\n",
    "with open('/Users/tunder/Dropbox/DataMunging/rulesets/MainDictionary.txt', encoding = 'utf-8') as f:\n",
    "    for line in f:\n",
    "        fields = line.strip().split('\\t')\n",
    "        realwords.add(fields[0])\n",
    "\n",
    "special_allowed = {'#arabicnumeral', '#personalname', '#placename', \n",
    "                   '#monthoftheyear', '#romannumeral', '#dayoftheweek',\n",
    "                  'bullshit', \"n't\", \"'ll\", \"'ve\", \"'d\", \"'re\", \"'em\",\n",
    "                  'ing', 'a.m.', 'p.m.'}"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "docfreq = Counter()\n",
    "\n",
    "with open('../parsejsons/rawcounts.tsv', encoding = 'utf-8') as f:\n",
    "    docset = set()\n",
    "    wordset = set()\n",
    "    \n",
    "    for line in f:\n",
    "        row = line.strip().split('\\t')\n",
    "        doc = row[0]\n",
    "        if doc == 'docid':\n",
    "            continue\n",
    "        if doc not in docset:\n",
    "            docset.add(doc)\n",
    "            wordset = set()\n",
    "            \n",
    "        word = row[1]\n",
    "        if word not in realwords and len(word) > 1 and word not in special_allowed:\n",
    "            stripped_word = word.strip(\".,—'-\\\"\")\n",
    "            if stripped_word not in realwords:\n",
    "                continue\n",
    "            else:\n",
    "                word = stripped_word\n",
    "                \n",
    "        if word not in wordset:        \n",
    "            docfreq[word] += 1\n",
    "            wordset.add(word)\n",
    "\n",
    "common = docfreq.most_common(10000)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "6846"
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "len(docset)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Turn the list of common words into two handy variables\n",
    "\n",
    "1. A **lexindex** that maps words to positions in a vector.\n",
    "\n",
    "2. An **inverse document frequency vector** that has idf for each word."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [],
   "source": [
    "lexindex = dict()\n",
    "idfvec = np.zeros(10000)\n",
    "\n",
    "for idx, atuple in enumerate(common):\n",
    "    word, count = atuple\n",
    "    lexindex[word] = idx\n",
    "    idf = math.log(6847 /count)\n",
    "    idfvec[idx] = idf"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Create document vectors"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [],
   "source": [
    "docvec = dict()\n",
    "\n",
    "with open('../parsejsons/rawcounts.tsv', encoding = 'utf-8') as f:\n",
    "    docset = set()\n",
    "    wordset = set()\n",
    "    \n",
    "    for line in f:\n",
    "        row = line.strip().split('\\t')\n",
    "        doc = row[0]\n",
    "        if doc == 'docid':\n",
    "            continue\n",
    "        if doc not in docvec:\n",
    "            docvec[doc] = np.zeros(10000)\n",
    "            \n",
    "        word = row[1]\n",
    "        if word not in lexindex:\n",
    "            stripped_word = word.strip(\".,—'-\\\"\")\n",
    "            if stripped_word not in lexindex:\n",
    "                continue\n",
    "            else:\n",
    "                word = stripped_word\n",
    "        \n",
    "        idx = lexindex[word]\n",
    "        count = int(row[2])\n",
    "        docvec[doc][idx] = docvec[doc][idx] + count"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Now, to create genre vectors"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Metadata by volume\n",
    "\n",
    "meta = pd.read_csv('../metadata/genremeta.csv')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Lists of genre types\n",
    "\n",
    "genrenamedf = pd.read_csv('../metadata/selected_genres.tsv', sep = '\\t')\n",
    "primaries = genrenamedf.loc[genrenamedf.genretype == 'primary', 'genre'].tolist()\n",
    "bgenres = genrenamedf.loc[genrenamedf.genretype == 'B genre', 'genre'].tolist()\n",
    "intersection_genres = set(genrenamedf.loc[genrenamedf.genretype == 'intersection', 'genre'].tolist())"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "We're going to create a tdf-idf vector in a dictionary for all the primary genres, and also the b genres, which will be used when we need a self-comparison for a primary genre."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [],
   "source": [
    "def has_genre(row, genre):\n",
    "    g = row.tags\n",
    "    tags = g.split('|')\n",
    "    if genre in tags:\n",
    "        return row.docid\n",
    "    else:\n",
    "        return None\n",
    "    \n",
    "def sum_genre(meta, genre, docvec):\n",
    "    global missing\n",
    "    \n",
    "    genredocs = meta.apply(has_genre, args = ([genre]), axis = 1)\n",
    "    genredocs = set(genredocs)\n",
    "    genredocs.remove(None)\n",
    "    vector = np.zeros(10000)\n",
    "    for g in genredocs:\n",
    "        if g in docvec:\n",
    "            vector = vector + docvec[g]\n",
    "        else:\n",
    "            print('missing volume: ' + g)\n",
    "            missing.add(g)\n",
    "    return vector\n",
    "    \n",
    "def tfidf(genre, meta, docvec, idfvec):\n",
    "    vector = sum_genre(meta, genre, docvec)\n",
    "    genre_tfidf = vector * idfvec\n",
    "    return genre_tfidf\n",
    "        \n",
    "tfidf_dict = dict()\n",
    "for g in primaries:\n",
    "    tfidf_dict[g] = tfidf(g, meta, docvec, idfvec)\n",
    "for g in bgenres:\n",
    "    tfidf_dict[g] = tfidf(g, meta, docvec, idfvec)\n",
    "    "
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Calculate cosine distances\n",
    "\n",
    "in a dictionary indexed by genre. The two levels of indexing correspond to horizontal and vertical columns in a square matrix."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [],
   "source": [
    "cosdist = dict()\n",
    "\n",
    "for g1 in primaries:\n",
    "    cosdist[g1] = dict()\n",
    "    \n",
    "    for g2 in primaries:\n",
    "        if g1 == g2:\n",
    "            bversion = g1 + ' B'\n",
    "            if bversion in bgenres:\n",
    "                cos = spatial.distance.cosine(tfidf_dict[g1], tfidf_dict[bversion])\n",
    "                cosdist[g1][g2] = cos\n",
    "            else:\n",
    "                cosdist[g1][g2] = float('nan')\n",
    "        else:                  \n",
    "            cos = spatial.distance.cosine(tfidf_dict[g1], tfidf_dict[g2])\n",
    "            cosdist[g1][g2] = cos"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "To actually see this as a square matrix, turn it into a DataFrame. Note that the matrix has NaNs for self-comparisons in cases where we didn't have a b genre (not enough volumes to create two 100-vol non-overlapping sets)."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Adventure</th>\n",
       "      <th>Bildungsroman</th>\n",
       "      <th>Biographical</th>\n",
       "      <th>Christian</th>\n",
       "      <th>Domestic</th>\n",
       "      <th>Fantasy</th>\n",
       "      <th>Historical</th>\n",
       "      <th>Horror</th>\n",
       "      <th>Humor</th>\n",
       "      <th>Juvenile</th>\n",
       "      <th>...</th>\n",
       "      <th>Subj: Man-woman</th>\n",
       "      <th>Subj: SF, American</th>\n",
       "      <th>Subj: SF, Other</th>\n",
       "      <th>Subj: Short stories, American</th>\n",
       "      <th>Subj: Short stories, Other</th>\n",
       "      <th>Suspense</th>\n",
       "      <th>War</th>\n",
       "      <th>Western</th>\n",
       "      <th>randomA</th>\n",
       "      <th>randomB</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>Adventure</th>\n",
       "      <td>NaN</td>\n",
       "      <td>0.215406</td>\n",
       "      <td>0.199636</td>\n",
       "      <td>0.260327</td>\n",
       "      <td>0.206770</td>\n",
       "      <td>0.190834</td>\n",
       "      <td>0.170435</td>\n",
       "      <td>0.138244</td>\n",
       "      <td>0.148603</td>\n",
       "      <td>0.383608</td>\n",
       "      <td>...</td>\n",
       "      <td>0.178036</td>\n",
       "      <td>0.179597</td>\n",
       "      <td>0.195417</td>\n",
       "      <td>0.140443</td>\n",
       "      <td>0.206562</td>\n",
       "      <td>0.131355</td>\n",
       "      <td>0.259147</td>\n",
       "      <td>0.286835</td>\n",
       "      <td>0.157745</td>\n",
       "      <td>0.183035</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Bildungsroman</th>\n",
       "      <td>0.215406</td>\n",
       "      <td>0.089303</td>\n",
       "      <td>0.220546</td>\n",
       "      <td>0.249257</td>\n",
       "      <td>0.110307</td>\n",
       "      <td>0.282142</td>\n",
       "      <td>0.214896</td>\n",
       "      <td>0.177737</td>\n",
       "      <td>0.136661</td>\n",
       "      <td>0.446081</td>\n",
       "      <td>...</td>\n",
       "      <td>0.156228</td>\n",
       "      <td>0.295572</td>\n",
       "      <td>0.318653</td>\n",
       "      <td>0.123164</td>\n",
       "      <td>0.171966</td>\n",
       "      <td>0.224473</td>\n",
       "      <td>0.418203</td>\n",
       "      <td>0.344071</td>\n",
       "      <td>0.171014</td>\n",
       "      <td>0.192145</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Biographical</th>\n",
       "      <td>0.199636</td>\n",
       "      <td>0.220546</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0.235848</td>\n",
       "      <td>0.271429</td>\n",
       "      <td>0.166882</td>\n",
       "      <td>0.086023</td>\n",
       "      <td>0.212154</td>\n",
       "      <td>0.257784</td>\n",
       "      <td>0.286990</td>\n",
       "      <td>...</td>\n",
       "      <td>0.258573</td>\n",
       "      <td>0.264854</td>\n",
       "      <td>0.279647</td>\n",
       "      <td>0.160999</td>\n",
       "      <td>0.154715</td>\n",
       "      <td>0.304745</td>\n",
       "      <td>0.303012</td>\n",
       "      <td>0.282809</td>\n",
       "      <td>0.110679</td>\n",
       "      <td>0.115196</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Christian</th>\n",
       "      <td>0.260327</td>\n",
       "      <td>0.249257</td>\n",
       "      <td>0.235848</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0.251991</td>\n",
       "      <td>0.261536</td>\n",
       "      <td>0.233791</td>\n",
       "      <td>0.238892</td>\n",
       "      <td>0.237135</td>\n",
       "      <td>0.392191</td>\n",
       "      <td>...</td>\n",
       "      <td>0.271262</td>\n",
       "      <td>0.332135</td>\n",
       "      <td>0.361468</td>\n",
       "      <td>0.214116</td>\n",
       "      <td>0.245587</td>\n",
       "      <td>0.300240</td>\n",
       "      <td>0.432316</td>\n",
       "      <td>0.365158</td>\n",
       "      <td>0.193530</td>\n",
       "      <td>0.205755</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Domestic</th>\n",
       "      <td>0.206770</td>\n",
       "      <td>0.110307</td>\n",
       "      <td>0.271429</td>\n",
       "      <td>0.251991</td>\n",
       "      <td>0.084297</td>\n",
       "      <td>0.288123</td>\n",
       "      <td>0.262960</td>\n",
       "      <td>0.161963</td>\n",
       "      <td>0.101590</td>\n",
       "      <td>0.525196</td>\n",
       "      <td>...</td>\n",
       "      <td>0.140270</td>\n",
       "      <td>0.307747</td>\n",
       "      <td>0.332503</td>\n",
       "      <td>0.144971</td>\n",
       "      <td>0.211775</td>\n",
       "      <td>0.174811</td>\n",
       "      <td>0.435761</td>\n",
       "      <td>0.347907</td>\n",
       "      <td>0.215681</td>\n",
       "      <td>0.244359</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Fantasy</th>\n",
       "      <td>0.190834</td>\n",
       "      <td>0.282142</td>\n",
       "      <td>0.166882</td>\n",
       "      <td>0.261536</td>\n",
       "      <td>0.288123</td>\n",
       "      <td>0.071596</td>\n",
       "      <td>0.166720</td>\n",
       "      <td>0.160623</td>\n",
       "      <td>0.266337</td>\n",
       "      <td>0.372555</td>\n",
       "      <td>...</td>\n",
       "      <td>0.274196</td>\n",
       "      <td>0.194761</td>\n",
       "      <td>0.217714</td>\n",
       "      <td>0.211413</td>\n",
       "      <td>0.219323</td>\n",
       "      <td>0.290014</td>\n",
       "      <td>0.369003</td>\n",
       "      <td>0.315618</td>\n",
       "      <td>0.182390</td>\n",
       "      <td>0.193357</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Historical</th>\n",
       "      <td>0.170435</td>\n",
       "      <td>0.214896</td>\n",
       "      <td>0.086023</td>\n",
       "      <td>0.233791</td>\n",
       "      <td>0.262960</td>\n",
       "      <td>0.166720</td>\n",
       "      <td>0.074212</td>\n",
       "      <td>0.214511</td>\n",
       "      <td>0.251978</td>\n",
       "      <td>0.284545</td>\n",
       "      <td>...</td>\n",
       "      <td>0.250362</td>\n",
       "      <td>0.255819</td>\n",
       "      <td>0.262631</td>\n",
       "      <td>0.147539</td>\n",
       "      <td>0.148783</td>\n",
       "      <td>0.284327</td>\n",
       "      <td>0.242472</td>\n",
       "      <td>0.243358</td>\n",
       "      <td>0.107812</td>\n",
       "      <td>0.113900</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Horror</th>\n",
       "      <td>0.138244</td>\n",
       "      <td>0.177737</td>\n",
       "      <td>0.212154</td>\n",
       "      <td>0.238892</td>\n",
       "      <td>0.161963</td>\n",
       "      <td>0.160623</td>\n",
       "      <td>0.214511</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0.135872</td>\n",
       "      <td>0.436160</td>\n",
       "      <td>...</td>\n",
       "      <td>0.141327</td>\n",
       "      <td>0.175446</td>\n",
       "      <td>0.206549</td>\n",
       "      <td>0.137688</td>\n",
       "      <td>0.185575</td>\n",
       "      <td>0.135654</td>\n",
       "      <td>0.398639</td>\n",
       "      <td>0.329349</td>\n",
       "      <td>0.146107</td>\n",
       "      <td>0.168549</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Humor</th>\n",
       "      <td>0.148603</td>\n",
       "      <td>0.136661</td>\n",
       "      <td>0.257784</td>\n",
       "      <td>0.237135</td>\n",
       "      <td>0.101590</td>\n",
       "      <td>0.266337</td>\n",
       "      <td>0.251978</td>\n",
       "      <td>0.135872</td>\n",
       "      <td>0.082600</td>\n",
       "      <td>0.498768</td>\n",
       "      <td>...</td>\n",
       "      <td>0.108213</td>\n",
       "      <td>0.255216</td>\n",
       "      <td>0.283067</td>\n",
       "      <td>0.126966</td>\n",
       "      <td>0.210300</td>\n",
       "      <td>0.124569</td>\n",
       "      <td>0.410975</td>\n",
       "      <td>0.342050</td>\n",
       "      <td>0.191327</td>\n",
       "      <td>0.215768</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Juvenile</th>\n",
       "      <td>0.383608</td>\n",
       "      <td>0.446081</td>\n",
       "      <td>0.286990</td>\n",
       "      <td>0.392191</td>\n",
       "      <td>0.525196</td>\n",
       "      <td>0.372555</td>\n",
       "      <td>0.284545</td>\n",
       "      <td>0.436160</td>\n",
       "      <td>0.498768</td>\n",
       "      <td>0.150533</td>\n",
       "      <td>...</td>\n",
       "      <td>0.496166</td>\n",
       "      <td>0.448871</td>\n",
       "      <td>0.431382</td>\n",
       "      <td>0.311042</td>\n",
       "      <td>0.307172</td>\n",
       "      <td>0.555484</td>\n",
       "      <td>0.381493</td>\n",
       "      <td>0.442733</td>\n",
       "      <td>0.219484</td>\n",
       "      <td>0.208283</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Love</th>\n",
       "      <td>0.155450</td>\n",
       "      <td>0.110697</td>\n",
       "      <td>0.221559</td>\n",
       "      <td>0.216586</td>\n",
       "      <td>0.086326</td>\n",
       "      <td>0.238471</td>\n",
       "      <td>0.222353</td>\n",
       "      <td>0.117312</td>\n",
       "      <td>0.086395</td>\n",
       "      <td>0.460468</td>\n",
       "      <td>...</td>\n",
       "      <td>0.100014</td>\n",
       "      <td>0.255421</td>\n",
       "      <td>0.282122</td>\n",
       "      <td>0.120361</td>\n",
       "      <td>0.184177</td>\n",
       "      <td>0.136227</td>\n",
       "      <td>0.383620</td>\n",
       "      <td>0.327878</td>\n",
       "      <td>0.160328</td>\n",
       "      <td>0.187813</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Mystery</th>\n",
       "      <td>0.159139</td>\n",
       "      <td>0.206836</td>\n",
       "      <td>0.301968</td>\n",
       "      <td>0.304018</td>\n",
       "      <td>0.169842</td>\n",
       "      <td>0.295978</td>\n",
       "      <td>0.281376</td>\n",
       "      <td>0.146056</td>\n",
       "      <td>0.122446</td>\n",
       "      <td>0.535295</td>\n",
       "      <td>...</td>\n",
       "      <td>0.165335</td>\n",
       "      <td>0.264307</td>\n",
       "      <td>0.288518</td>\n",
       "      <td>0.174227</td>\n",
       "      <td>0.262591</td>\n",
       "      <td>0.074852</td>\n",
       "      <td>0.406492</td>\n",
       "      <td>0.353477</td>\n",
       "      <td>0.222961</td>\n",
       "      <td>0.256755</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Novel</th>\n",
       "      <td>0.153643</td>\n",
       "      <td>0.153492</td>\n",
       "      <td>0.165578</td>\n",
       "      <td>0.239096</td>\n",
       "      <td>0.173193</td>\n",
       "      <td>0.179180</td>\n",
       "      <td>0.164533</td>\n",
       "      <td>0.134749</td>\n",
       "      <td>0.154978</td>\n",
       "      <td>0.359460</td>\n",
       "      <td>...</td>\n",
       "      <td>0.165005</td>\n",
       "      <td>0.216203</td>\n",
       "      <td>0.224014</td>\n",
       "      <td>0.135500</td>\n",
       "      <td>0.133117</td>\n",
       "      <td>0.183557</td>\n",
       "      <td>0.347773</td>\n",
       "      <td>0.336905</td>\n",
       "      <td>0.105567</td>\n",
       "      <td>0.123045</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Political</th>\n",
       "      <td>0.143171</td>\n",
       "      <td>0.237443</td>\n",
       "      <td>0.242419</td>\n",
       "      <td>0.297211</td>\n",
       "      <td>0.205199</td>\n",
       "      <td>0.282515</td>\n",
       "      <td>0.251590</td>\n",
       "      <td>0.195155</td>\n",
       "      <td>0.154489</td>\n",
       "      <td>0.501549</td>\n",
       "      <td>...</td>\n",
       "      <td>0.194265</td>\n",
       "      <td>0.253997</td>\n",
       "      <td>0.275485</td>\n",
       "      <td>0.202040</td>\n",
       "      <td>0.248402</td>\n",
       "      <td>0.131558</td>\n",
       "      <td>0.333729</td>\n",
       "      <td>0.399106</td>\n",
       "      <td>0.198005</td>\n",
       "      <td>0.224784</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Psychological</th>\n",
       "      <td>0.147766</td>\n",
       "      <td>0.135806</td>\n",
       "      <td>0.284893</td>\n",
       "      <td>0.246437</td>\n",
       "      <td>0.092681</td>\n",
       "      <td>0.265408</td>\n",
       "      <td>0.274869</td>\n",
       "      <td>0.123851</td>\n",
       "      <td>0.074700</td>\n",
       "      <td>0.544965</td>\n",
       "      <td>...</td>\n",
       "      <td>0.098433</td>\n",
       "      <td>0.266838</td>\n",
       "      <td>0.296214</td>\n",
       "      <td>0.155976</td>\n",
       "      <td>0.225852</td>\n",
       "      <td>0.110495</td>\n",
       "      <td>0.429228</td>\n",
       "      <td>0.372260</td>\n",
       "      <td>0.218081</td>\n",
       "      <td>0.248149</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>SF</th>\n",
       "      <td>0.159006</td>\n",
       "      <td>0.298112</td>\n",
       "      <td>0.286170</td>\n",
       "      <td>0.342318</td>\n",
       "      <td>0.303312</td>\n",
       "      <td>0.220451</td>\n",
       "      <td>0.268585</td>\n",
       "      <td>0.197876</td>\n",
       "      <td>0.247244</td>\n",
       "      <td>0.475975</td>\n",
       "      <td>...</td>\n",
       "      <td>0.276053</td>\n",
       "      <td>0.099889</td>\n",
       "      <td>0.103091</td>\n",
       "      <td>0.247263</td>\n",
       "      <td>0.283008</td>\n",
       "      <td>0.216361</td>\n",
       "      <td>0.340824</td>\n",
       "      <td>0.396729</td>\n",
       "      <td>0.231634</td>\n",
       "      <td>0.263427</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Short stories</th>\n",
       "      <td>0.144126</td>\n",
       "      <td>0.141261</td>\n",
       "      <td>0.150531</td>\n",
       "      <td>0.207424</td>\n",
       "      <td>0.156345</td>\n",
       "      <td>0.161965</td>\n",
       "      <td>0.145205</td>\n",
       "      <td>0.107644</td>\n",
       "      <td>0.137585</td>\n",
       "      <td>0.332431</td>\n",
       "      <td>...</td>\n",
       "      <td>0.144735</td>\n",
       "      <td>0.185393</td>\n",
       "      <td>0.204511</td>\n",
       "      <td>0.068746</td>\n",
       "      <td>0.074144</td>\n",
       "      <td>0.192077</td>\n",
       "      <td>0.336958</td>\n",
       "      <td>0.284187</td>\n",
       "      <td>0.084417</td>\n",
       "      <td>0.093777</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Subj: Detective</th>\n",
       "      <td>0.192090</td>\n",
       "      <td>0.223515</td>\n",
       "      <td>0.273882</td>\n",
       "      <td>0.316194</td>\n",
       "      <td>0.239441</td>\n",
       "      <td>0.298537</td>\n",
       "      <td>0.241744</td>\n",
       "      <td>0.181511</td>\n",
       "      <td>0.185652</td>\n",
       "      <td>0.434352</td>\n",
       "      <td>...</td>\n",
       "      <td>0.207894</td>\n",
       "      <td>0.263558</td>\n",
       "      <td>0.276298</td>\n",
       "      <td>0.156829</td>\n",
       "      <td>0.214296</td>\n",
       "      <td>0.157145</td>\n",
       "      <td>0.384134</td>\n",
       "      <td>0.361163</td>\n",
       "      <td>0.166248</td>\n",
       "      <td>0.190798</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Subj: Fairy tales</th>\n",
       "      <td>0.628749</td>\n",
       "      <td>0.648252</td>\n",
       "      <td>0.488187</td>\n",
       "      <td>0.577640</td>\n",
       "      <td>0.688710</td>\n",
       "      <td>0.477085</td>\n",
       "      <td>0.532474</td>\n",
       "      <td>0.566366</td>\n",
       "      <td>0.671378</td>\n",
       "      <td>0.426414</td>\n",
       "      <td>...</td>\n",
       "      <td>0.659239</td>\n",
       "      <td>0.638714</td>\n",
       "      <td>0.642966</td>\n",
       "      <td>0.574243</td>\n",
       "      <td>0.535119</td>\n",
       "      <td>0.727998</td>\n",
       "      <td>0.690327</td>\n",
       "      <td>0.674160</td>\n",
       "      <td>0.449376</td>\n",
       "      <td>0.444713</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Subj: Fantasy</th>\n",
       "      <td>0.200923</td>\n",
       "      <td>0.252037</td>\n",
       "      <td>0.184323</td>\n",
       "      <td>0.282119</td>\n",
       "      <td>0.272366</td>\n",
       "      <td>0.118681</td>\n",
       "      <td>0.186989</td>\n",
       "      <td>0.146308</td>\n",
       "      <td>0.238908</td>\n",
       "      <td>0.347899</td>\n",
       "      <td>...</td>\n",
       "      <td>0.258900</td>\n",
       "      <td>0.170259</td>\n",
       "      <td>0.194883</td>\n",
       "      <td>0.164046</td>\n",
       "      <td>0.188593</td>\n",
       "      <td>0.285030</td>\n",
       "      <td>0.380742</td>\n",
       "      <td>0.323726</td>\n",
       "      <td>0.149265</td>\n",
       "      <td>0.157471</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Subj: History</th>\n",
       "      <td>0.315313</td>\n",
       "      <td>0.399004</td>\n",
       "      <td>0.157203</td>\n",
       "      <td>0.318049</td>\n",
       "      <td>0.463290</td>\n",
       "      <td>0.279166</td>\n",
       "      <td>0.167043</td>\n",
       "      <td>0.353593</td>\n",
       "      <td>0.436083</td>\n",
       "      <td>0.216644</td>\n",
       "      <td>...</td>\n",
       "      <td>0.415784</td>\n",
       "      <td>0.370638</td>\n",
       "      <td>0.365888</td>\n",
       "      <td>0.288250</td>\n",
       "      <td>0.245317</td>\n",
       "      <td>0.452374</td>\n",
       "      <td>0.258764</td>\n",
       "      <td>0.403887</td>\n",
       "      <td>0.149322</td>\n",
       "      <td>0.147591</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Subj: Horror</th>\n",
       "      <td>0.194066</td>\n",
       "      <td>0.225984</td>\n",
       "      <td>0.202702</td>\n",
       "      <td>0.261670</td>\n",
       "      <td>0.263250</td>\n",
       "      <td>0.169431</td>\n",
       "      <td>0.191359</td>\n",
       "      <td>0.110396</td>\n",
       "      <td>0.232870</td>\n",
       "      <td>0.324789</td>\n",
       "      <td>...</td>\n",
       "      <td>0.220425</td>\n",
       "      <td>0.180314</td>\n",
       "      <td>0.191460</td>\n",
       "      <td>0.130912</td>\n",
       "      <td>0.131449</td>\n",
       "      <td>0.258936</td>\n",
       "      <td>0.385552</td>\n",
       "      <td>0.337876</td>\n",
       "      <td>0.100753</td>\n",
       "      <td>0.099603</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Subj: Humor</th>\n",
       "      <td>0.308312</td>\n",
       "      <td>0.337807</td>\n",
       "      <td>0.283597</td>\n",
       "      <td>0.325308</td>\n",
       "      <td>0.389157</td>\n",
       "      <td>0.373639</td>\n",
       "      <td>0.288126</td>\n",
       "      <td>0.346928</td>\n",
       "      <td>0.318433</td>\n",
       "      <td>0.304693</td>\n",
       "      <td>...</td>\n",
       "      <td>0.354714</td>\n",
       "      <td>0.365372</td>\n",
       "      <td>0.375894</td>\n",
       "      <td>0.199569</td>\n",
       "      <td>0.227459</td>\n",
       "      <td>0.419920</td>\n",
       "      <td>0.442679</td>\n",
       "      <td>0.400409</td>\n",
       "      <td>0.183328</td>\n",
       "      <td>0.165591</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Subj: Juvenile</th>\n",
       "      <td>0.320457</td>\n",
       "      <td>0.387811</td>\n",
       "      <td>0.251899</td>\n",
       "      <td>0.378295</td>\n",
       "      <td>0.461253</td>\n",
       "      <td>0.311534</td>\n",
       "      <td>0.226089</td>\n",
       "      <td>0.389053</td>\n",
       "      <td>0.441437</td>\n",
       "      <td>0.148339</td>\n",
       "      <td>...</td>\n",
       "      <td>0.436986</td>\n",
       "      <td>0.372400</td>\n",
       "      <td>0.345339</td>\n",
       "      <td>0.276347</td>\n",
       "      <td>0.247498</td>\n",
       "      <td>0.496181</td>\n",
       "      <td>0.338266</td>\n",
       "      <td>0.375895</td>\n",
       "      <td>0.190574</td>\n",
       "      <td>0.186396</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Subj: Man-woman</th>\n",
       "      <td>0.178036</td>\n",
       "      <td>0.156228</td>\n",
       "      <td>0.258573</td>\n",
       "      <td>0.271262</td>\n",
       "      <td>0.140270</td>\n",
       "      <td>0.274196</td>\n",
       "      <td>0.250362</td>\n",
       "      <td>0.141327</td>\n",
       "      <td>0.108213</td>\n",
       "      <td>0.496166</td>\n",
       "      <td>...</td>\n",
       "      <td>0.090782</td>\n",
       "      <td>0.273947</td>\n",
       "      <td>0.299119</td>\n",
       "      <td>0.153163</td>\n",
       "      <td>0.208536</td>\n",
       "      <td>0.167142</td>\n",
       "      <td>0.430001</td>\n",
       "      <td>0.388016</td>\n",
       "      <td>0.179643</td>\n",
       "      <td>0.203911</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Subj: SF, American</th>\n",
       "      <td>0.179597</td>\n",
       "      <td>0.295572</td>\n",
       "      <td>0.264854</td>\n",
       "      <td>0.332135</td>\n",
       "      <td>0.307747</td>\n",
       "      <td>0.194761</td>\n",
       "      <td>0.255819</td>\n",
       "      <td>0.175446</td>\n",
       "      <td>0.255216</td>\n",
       "      <td>0.448871</td>\n",
       "      <td>...</td>\n",
       "      <td>0.273947</td>\n",
       "      <td>0.030278</td>\n",
       "      <td>0.041155</td>\n",
       "      <td>0.196287</td>\n",
       "      <td>0.237414</td>\n",
       "      <td>0.253581</td>\n",
       "      <td>0.366562</td>\n",
       "      <td>0.363445</td>\n",
       "      <td>0.209545</td>\n",
       "      <td>0.227276</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Subj: SF, Other</th>\n",
       "      <td>0.195417</td>\n",
       "      <td>0.318653</td>\n",
       "      <td>0.279647</td>\n",
       "      <td>0.361468</td>\n",
       "      <td>0.332503</td>\n",
       "      <td>0.217714</td>\n",
       "      <td>0.262631</td>\n",
       "      <td>0.206549</td>\n",
       "      <td>0.283067</td>\n",
       "      <td>0.431382</td>\n",
       "      <td>...</td>\n",
       "      <td>0.299119</td>\n",
       "      <td>0.041155</td>\n",
       "      <td>0.042934</td>\n",
       "      <td>0.223805</td>\n",
       "      <td>0.244213</td>\n",
       "      <td>0.275171</td>\n",
       "      <td>0.357406</td>\n",
       "      <td>0.389982</td>\n",
       "      <td>0.209787</td>\n",
       "      <td>0.227908</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Subj: Short stories, American</th>\n",
       "      <td>0.140443</td>\n",
       "      <td>0.123164</td>\n",
       "      <td>0.160999</td>\n",
       "      <td>0.214116</td>\n",
       "      <td>0.144971</td>\n",
       "      <td>0.211413</td>\n",
       "      <td>0.147539</td>\n",
       "      <td>0.137688</td>\n",
       "      <td>0.126966</td>\n",
       "      <td>0.311042</td>\n",
       "      <td>...</td>\n",
       "      <td>0.153163</td>\n",
       "      <td>0.196287</td>\n",
       "      <td>0.223805</td>\n",
       "      <td>0.030228</td>\n",
       "      <td>0.097183</td>\n",
       "      <td>0.205100</td>\n",
       "      <td>0.331762</td>\n",
       "      <td>0.221426</td>\n",
       "      <td>0.095434</td>\n",
       "      <td>0.099133</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Subj: Short stories, Other</th>\n",
       "      <td>0.206562</td>\n",
       "      <td>0.171966</td>\n",
       "      <td>0.154715</td>\n",
       "      <td>0.245587</td>\n",
       "      <td>0.211775</td>\n",
       "      <td>0.219323</td>\n",
       "      <td>0.148783</td>\n",
       "      <td>0.185575</td>\n",
       "      <td>0.210300</td>\n",
       "      <td>0.307172</td>\n",
       "      <td>...</td>\n",
       "      <td>0.208536</td>\n",
       "      <td>0.237414</td>\n",
       "      <td>0.244213</td>\n",
       "      <td>0.097183</td>\n",
       "      <td>0.049202</td>\n",
       "      <td>0.274363</td>\n",
       "      <td>0.328212</td>\n",
       "      <td>0.317023</td>\n",
       "      <td>0.084314</td>\n",
       "      <td>0.085037</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Suspense</th>\n",
       "      <td>0.131355</td>\n",
       "      <td>0.224473</td>\n",
       "      <td>0.304745</td>\n",
       "      <td>0.300240</td>\n",
       "      <td>0.174811</td>\n",
       "      <td>0.290014</td>\n",
       "      <td>0.284327</td>\n",
       "      <td>0.135654</td>\n",
       "      <td>0.124569</td>\n",
       "      <td>0.555484</td>\n",
       "      <td>...</td>\n",
       "      <td>0.167142</td>\n",
       "      <td>0.253581</td>\n",
       "      <td>0.275171</td>\n",
       "      <td>0.205100</td>\n",
       "      <td>0.274363</td>\n",
       "      <td>0.062647</td>\n",
       "      <td>0.374662</td>\n",
       "      <td>0.386832</td>\n",
       "      <td>0.236303</td>\n",
       "      <td>0.275541</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>War</th>\n",
       "      <td>0.259147</td>\n",
       "      <td>0.418203</td>\n",
       "      <td>0.303012</td>\n",
       "      <td>0.432316</td>\n",
       "      <td>0.435761</td>\n",
       "      <td>0.369003</td>\n",
       "      <td>0.242472</td>\n",
       "      <td>0.398639</td>\n",
       "      <td>0.410975</td>\n",
       "      <td>0.381493</td>\n",
       "      <td>...</td>\n",
       "      <td>0.430001</td>\n",
       "      <td>0.366562</td>\n",
       "      <td>0.357406</td>\n",
       "      <td>0.331762</td>\n",
       "      <td>0.328212</td>\n",
       "      <td>0.374662</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0.403101</td>\n",
       "      <td>0.266369</td>\n",
       "      <td>0.298906</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>Western</th>\n",
       "      <td>0.286835</td>\n",
       "      <td>0.344071</td>\n",
       "      <td>0.282809</td>\n",
       "      <td>0.365158</td>\n",
       "      <td>0.347907</td>\n",
       "      <td>0.315618</td>\n",
       "      <td>0.243358</td>\n",
       "      <td>0.329349</td>\n",
       "      <td>0.342050</td>\n",
       "      <td>0.442733</td>\n",
       "      <td>...</td>\n",
       "      <td>0.388016</td>\n",
       "      <td>0.363445</td>\n",
       "      <td>0.389982</td>\n",
       "      <td>0.221426</td>\n",
       "      <td>0.317023</td>\n",
       "      <td>0.386832</td>\n",
       "      <td>0.403101</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0.309861</td>\n",
       "      <td>0.308590</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>randomA</th>\n",
       "      <td>0.157745</td>\n",
       "      <td>0.171014</td>\n",
       "      <td>0.110679</td>\n",
       "      <td>0.193530</td>\n",
       "      <td>0.215681</td>\n",
       "      <td>0.182390</td>\n",
       "      <td>0.107812</td>\n",
       "      <td>0.146107</td>\n",
       "      <td>0.191327</td>\n",
       "      <td>0.219484</td>\n",
       "      <td>...</td>\n",
       "      <td>0.179643</td>\n",
       "      <td>0.209545</td>\n",
       "      <td>0.209787</td>\n",
       "      <td>0.095434</td>\n",
       "      <td>0.084314</td>\n",
       "      <td>0.236303</td>\n",
       "      <td>0.266369</td>\n",
       "      <td>0.309861</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0.017277</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>randomB</th>\n",
       "      <td>0.183035</td>\n",
       "      <td>0.192145</td>\n",
       "      <td>0.115196</td>\n",
       "      <td>0.205755</td>\n",
       "      <td>0.244359</td>\n",
       "      <td>0.193357</td>\n",
       "      <td>0.113900</td>\n",
       "      <td>0.168549</td>\n",
       "      <td>0.215768</td>\n",
       "      <td>0.208283</td>\n",
       "      <td>...</td>\n",
       "      <td>0.203911</td>\n",
       "      <td>0.227276</td>\n",
       "      <td>0.227908</td>\n",
       "      <td>0.099133</td>\n",
       "      <td>0.085037</td>\n",
       "      <td>0.275541</td>\n",
       "      <td>0.298906</td>\n",
       "      <td>0.308590</td>\n",
       "      <td>0.017277</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>34 rows × 34 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "                               Adventure  Bildungsroman  Biographical  \\\n",
       "Adventure                            NaN       0.215406      0.199636   \n",
       "Bildungsroman                   0.215406       0.089303      0.220546   \n",
       "Biographical                    0.199636       0.220546           NaN   \n",
       "Christian                       0.260327       0.249257      0.235848   \n",
       "Domestic                        0.206770       0.110307      0.271429   \n",
       "Fantasy                         0.190834       0.282142      0.166882   \n",
       "Historical                      0.170435       0.214896      0.086023   \n",
       "Horror                          0.138244       0.177737      0.212154   \n",
       "Humor                           0.148603       0.136661      0.257784   \n",
       "Juvenile                        0.383608       0.446081      0.286990   \n",
       "Love                            0.155450       0.110697      0.221559   \n",
       "Mystery                         0.159139       0.206836      0.301968   \n",
       "Novel                           0.153643       0.153492      0.165578   \n",
       "Political                       0.143171       0.237443      0.242419   \n",
       "Psychological                   0.147766       0.135806      0.284893   \n",
       "SF                              0.159006       0.298112      0.286170   \n",
       "Short stories                   0.144126       0.141261      0.150531   \n",
       "Subj: Detective                 0.192090       0.223515      0.273882   \n",
       "Subj: Fairy tales               0.628749       0.648252      0.488187   \n",
       "Subj: Fantasy                   0.200923       0.252037      0.184323   \n",
       "Subj: History                   0.315313       0.399004      0.157203   \n",
       "Subj: Horror                    0.194066       0.225984      0.202702   \n",
       "Subj: Humor                     0.308312       0.337807      0.283597   \n",
       "Subj: Juvenile                  0.320457       0.387811      0.251899   \n",
       "Subj: Man-woman                 0.178036       0.156228      0.258573   \n",
       "Subj: SF, American              0.179597       0.295572      0.264854   \n",
       "Subj: SF, Other                 0.195417       0.318653      0.279647   \n",
       "Subj: Short stories, American   0.140443       0.123164      0.160999   \n",
       "Subj: Short stories, Other      0.206562       0.171966      0.154715   \n",
       "Suspense                        0.131355       0.224473      0.304745   \n",
       "War                             0.259147       0.418203      0.303012   \n",
       "Western                         0.286835       0.344071      0.282809   \n",
       "randomA                         0.157745       0.171014      0.110679   \n",
       "randomB                         0.183035       0.192145      0.115196   \n",
       "\n",
       "                               Christian  Domestic   Fantasy  Historical  \\\n",
       "Adventure                       0.260327  0.206770  0.190834    0.170435   \n",
       "Bildungsroman                   0.249257  0.110307  0.282142    0.214896   \n",
       "Biographical                    0.235848  0.271429  0.166882    0.086023   \n",
       "Christian                            NaN  0.251991  0.261536    0.233791   \n",
       "Domestic                        0.251991  0.084297  0.288123    0.262960   \n",
       "Fantasy                         0.261536  0.288123  0.071596    0.166720   \n",
       "Historical                      0.233791  0.262960  0.166720    0.074212   \n",
       "Horror                          0.238892  0.161963  0.160623    0.214511   \n",
       "Humor                           0.237135  0.101590  0.266337    0.251978   \n",
       "Juvenile                        0.392191  0.525196  0.372555    0.284545   \n",
       "Love                            0.216586  0.086326  0.238471    0.222353   \n",
       "Mystery                         0.304018  0.169842  0.295978    0.281376   \n",
       "Novel                           0.239096  0.173193  0.179180    0.164533   \n",
       "Political                       0.297211  0.205199  0.282515    0.251590   \n",
       "Psychological                   0.246437  0.092681  0.265408    0.274869   \n",
       "SF                              0.342318  0.303312  0.220451    0.268585   \n",
       "Short stories                   0.207424  0.156345  0.161965    0.145205   \n",
       "Subj: Detective                 0.316194  0.239441  0.298537    0.241744   \n",
       "Subj: Fairy tales               0.577640  0.688710  0.477085    0.532474   \n",
       "Subj: Fantasy                   0.282119  0.272366  0.118681    0.186989   \n",
       "Subj: History                   0.318049  0.463290  0.279166    0.167043   \n",
       "Subj: Horror                    0.261670  0.263250  0.169431    0.191359   \n",
       "Subj: Humor                     0.325308  0.389157  0.373639    0.288126   \n",
       "Subj: Juvenile                  0.378295  0.461253  0.311534    0.226089   \n",
       "Subj: Man-woman                 0.271262  0.140270  0.274196    0.250362   \n",
       "Subj: SF, American              0.332135  0.307747  0.194761    0.255819   \n",
       "Subj: SF, Other                 0.361468  0.332503  0.217714    0.262631   \n",
       "Subj: Short stories, American   0.214116  0.144971  0.211413    0.147539   \n",
       "Subj: Short stories, Other      0.245587  0.211775  0.219323    0.148783   \n",
       "Suspense                        0.300240  0.174811  0.290014    0.284327   \n",
       "War                             0.432316  0.435761  0.369003    0.242472   \n",
       "Western                         0.365158  0.347907  0.315618    0.243358   \n",
       "randomA                         0.193530  0.215681  0.182390    0.107812   \n",
       "randomB                         0.205755  0.244359  0.193357    0.113900   \n",
       "\n",
       "                                 Horror     Humor  Juvenile    ...     \\\n",
       "Adventure                      0.138244  0.148603  0.383608    ...      \n",
       "Bildungsroman                  0.177737  0.136661  0.446081    ...      \n",
       "Biographical                   0.212154  0.257784  0.286990    ...      \n",
       "Christian                      0.238892  0.237135  0.392191    ...      \n",
       "Domestic                       0.161963  0.101590  0.525196    ...      \n",
       "Fantasy                        0.160623  0.266337  0.372555    ...      \n",
       "Historical                     0.214511  0.251978  0.284545    ...      \n",
       "Horror                              NaN  0.135872  0.436160    ...      \n",
       "Humor                          0.135872  0.082600  0.498768    ...      \n",
       "Juvenile                       0.436160  0.498768  0.150533    ...      \n",
       "Love                           0.117312  0.086395  0.460468    ...      \n",
       "Mystery                        0.146056  0.122446  0.535295    ...      \n",
       "Novel                          0.134749  0.154978  0.359460    ...      \n",
       "Political                      0.195155  0.154489  0.501549    ...      \n",
       "Psychological                  0.123851  0.074700  0.544965    ...      \n",
       "SF                             0.197876  0.247244  0.475975    ...      \n",
       "Short stories                  0.107644  0.137585  0.332431    ...      \n",
       "Subj: Detective                0.181511  0.185652  0.434352    ...      \n",
       "Subj: Fairy tales              0.566366  0.671378  0.426414    ...      \n",
       "Subj: Fantasy                  0.146308  0.238908  0.347899    ...      \n",
       "Subj: History                  0.353593  0.436083  0.216644    ...      \n",
       "Subj: Horror                   0.110396  0.232870  0.324789    ...      \n",
       "Subj: Humor                    0.346928  0.318433  0.304693    ...      \n",
       "Subj: Juvenile                 0.389053  0.441437  0.148339    ...      \n",
       "Subj: Man-woman                0.141327  0.108213  0.496166    ...      \n",
       "Subj: SF, American             0.175446  0.255216  0.448871    ...      \n",
       "Subj: SF, Other                0.206549  0.283067  0.431382    ...      \n",
       "Subj: Short stories, American  0.137688  0.126966  0.311042    ...      \n",
       "Subj: Short stories, Other     0.185575  0.210300  0.307172    ...      \n",
       "Suspense                       0.135654  0.124569  0.555484    ...      \n",
       "War                            0.398639  0.410975  0.381493    ...      \n",
       "Western                        0.329349  0.342050  0.442733    ...      \n",
       "randomA                        0.146107  0.191327  0.219484    ...      \n",
       "randomB                        0.168549  0.215768  0.208283    ...      \n",
       "\n",
       "                               Subj: Man-woman  Subj: SF, American  \\\n",
       "Adventure                             0.178036            0.179597   \n",
       "Bildungsroman                         0.156228            0.295572   \n",
       "Biographical                          0.258573            0.264854   \n",
       "Christian                             0.271262            0.332135   \n",
       "Domestic                              0.140270            0.307747   \n",
       "Fantasy                               0.274196            0.194761   \n",
       "Historical                            0.250362            0.255819   \n",
       "Horror                                0.141327            0.175446   \n",
       "Humor                                 0.108213            0.255216   \n",
       "Juvenile                              0.496166            0.448871   \n",
       "Love                                  0.100014            0.255421   \n",
       "Mystery                               0.165335            0.264307   \n",
       "Novel                                 0.165005            0.216203   \n",
       "Political                             0.194265            0.253997   \n",
       "Psychological                         0.098433            0.266838   \n",
       "SF                                    0.276053            0.099889   \n",
       "Short stories                         0.144735            0.185393   \n",
       "Subj: Detective                       0.207894            0.263558   \n",
       "Subj: Fairy tales                     0.659239            0.638714   \n",
       "Subj: Fantasy                         0.258900            0.170259   \n",
       "Subj: History                         0.415784            0.370638   \n",
       "Subj: Horror                          0.220425            0.180314   \n",
       "Subj: Humor                           0.354714            0.365372   \n",
       "Subj: Juvenile                        0.436986            0.372400   \n",
       "Subj: Man-woman                       0.090782            0.273947   \n",
       "Subj: SF, American                    0.273947            0.030278   \n",
       "Subj: SF, Other                       0.299119            0.041155   \n",
       "Subj: Short stories, American         0.153163            0.196287   \n",
       "Subj: Short stories, Other            0.208536            0.237414   \n",
       "Suspense                              0.167142            0.253581   \n",
       "War                                   0.430001            0.366562   \n",
       "Western                               0.388016            0.363445   \n",
       "randomA                               0.179643            0.209545   \n",
       "randomB                               0.203911            0.227276   \n",
       "\n",
       "                               Subj: SF, Other  Subj: Short stories, American  \\\n",
       "Adventure                             0.195417                       0.140443   \n",
       "Bildungsroman                         0.318653                       0.123164   \n",
       "Biographical                          0.279647                       0.160999   \n",
       "Christian                             0.361468                       0.214116   \n",
       "Domestic                              0.332503                       0.144971   \n",
       "Fantasy                               0.217714                       0.211413   \n",
       "Historical                            0.262631                       0.147539   \n",
       "Horror                                0.206549                       0.137688   \n",
       "Humor                                 0.283067                       0.126966   \n",
       "Juvenile                              0.431382                       0.311042   \n",
       "Love                                  0.282122                       0.120361   \n",
       "Mystery                               0.288518                       0.174227   \n",
       "Novel                                 0.224014                       0.135500   \n",
       "Political                             0.275485                       0.202040   \n",
       "Psychological                         0.296214                       0.155976   \n",
       "SF                                    0.103091                       0.247263   \n",
       "Short stories                         0.204511                       0.068746   \n",
       "Subj: Detective                       0.276298                       0.156829   \n",
       "Subj: Fairy tales                     0.642966                       0.574243   \n",
       "Subj: Fantasy                         0.194883                       0.164046   \n",
       "Subj: History                         0.365888                       0.288250   \n",
       "Subj: Horror                          0.191460                       0.130912   \n",
       "Subj: Humor                           0.375894                       0.199569   \n",
       "Subj: Juvenile                        0.345339                       0.276347   \n",
       "Subj: Man-woman                       0.299119                       0.153163   \n",
       "Subj: SF, American                    0.041155                       0.196287   \n",
       "Subj: SF, Other                       0.042934                       0.223805   \n",
       "Subj: Short stories, American         0.223805                       0.030228   \n",
       "Subj: Short stories, Other            0.244213                       0.097183   \n",
       "Suspense                              0.275171                       0.205100   \n",
       "War                                   0.357406                       0.331762   \n",
       "Western                               0.389982                       0.221426   \n",
       "randomA                               0.209787                       0.095434   \n",
       "randomB                               0.227908                       0.099133   \n",
       "\n",
       "                               Subj: Short stories, Other  Suspense       War  \\\n",
       "Adventure                                        0.206562  0.131355  0.259147   \n",
       "Bildungsroman                                    0.171966  0.224473  0.418203   \n",
       "Biographical                                     0.154715  0.304745  0.303012   \n",
       "Christian                                        0.245587  0.300240  0.432316   \n",
       "Domestic                                         0.211775  0.174811  0.435761   \n",
       "Fantasy                                          0.219323  0.290014  0.369003   \n",
       "Historical                                       0.148783  0.284327  0.242472   \n",
       "Horror                                           0.185575  0.135654  0.398639   \n",
       "Humor                                            0.210300  0.124569  0.410975   \n",
       "Juvenile                                         0.307172  0.555484  0.381493   \n",
       "Love                                             0.184177  0.136227  0.383620   \n",
       "Mystery                                          0.262591  0.074852  0.406492   \n",
       "Novel                                            0.133117  0.183557  0.347773   \n",
       "Political                                        0.248402  0.131558  0.333729   \n",
       "Psychological                                    0.225852  0.110495  0.429228   \n",
       "SF                                               0.283008  0.216361  0.340824   \n",
       "Short stories                                    0.074144  0.192077  0.336958   \n",
       "Subj: Detective                                  0.214296  0.157145  0.384134   \n",
       "Subj: Fairy tales                                0.535119  0.727998  0.690327   \n",
       "Subj: Fantasy                                    0.188593  0.285030  0.380742   \n",
       "Subj: History                                    0.245317  0.452374  0.258764   \n",
       "Subj: Horror                                     0.131449  0.258936  0.385552   \n",
       "Subj: Humor                                      0.227459  0.419920  0.442679   \n",
       "Subj: Juvenile                                   0.247498  0.496181  0.338266   \n",
       "Subj: Man-woman                                  0.208536  0.167142  0.430001   \n",
       "Subj: SF, American                               0.237414  0.253581  0.366562   \n",
       "Subj: SF, Other                                  0.244213  0.275171  0.357406   \n",
       "Subj: Short stories, American                    0.097183  0.205100  0.331762   \n",
       "Subj: Short stories, Other                       0.049202  0.274363  0.328212   \n",
       "Suspense                                         0.274363  0.062647  0.374662   \n",
       "War                                              0.328212  0.374662       NaN   \n",
       "Western                                          0.317023  0.386832  0.403101   \n",
       "randomA                                          0.084314  0.236303  0.266369   \n",
       "randomB                                          0.085037  0.275541  0.298906   \n",
       "\n",
       "                                Western   randomA   randomB  \n",
       "Adventure                      0.286835  0.157745  0.183035  \n",
       "Bildungsroman                  0.344071  0.171014  0.192145  \n",
       "Biographical                   0.282809  0.110679  0.115196  \n",
       "Christian                      0.365158  0.193530  0.205755  \n",
       "Domestic                       0.347907  0.215681  0.244359  \n",
       "Fantasy                        0.315618  0.182390  0.193357  \n",
       "Historical                     0.243358  0.107812  0.113900  \n",
       "Horror                         0.329349  0.146107  0.168549  \n",
       "Humor                          0.342050  0.191327  0.215768  \n",
       "Juvenile                       0.442733  0.219484  0.208283  \n",
       "Love                           0.327878  0.160328  0.187813  \n",
       "Mystery                        0.353477  0.222961  0.256755  \n",
       "Novel                          0.336905  0.105567  0.123045  \n",
       "Political                      0.399106  0.198005  0.224784  \n",
       "Psychological                  0.372260  0.218081  0.248149  \n",
       "SF                             0.396729  0.231634  0.263427  \n",
       "Short stories                  0.284187  0.084417  0.093777  \n",
       "Subj: Detective                0.361163  0.166248  0.190798  \n",
       "Subj: Fairy tales              0.674160  0.449376  0.444713  \n",
       "Subj: Fantasy                  0.323726  0.149265  0.157471  \n",
       "Subj: History                  0.403887  0.149322  0.147591  \n",
       "Subj: Horror                   0.337876  0.100753  0.099603  \n",
       "Subj: Humor                    0.400409  0.183328  0.165591  \n",
       "Subj: Juvenile                 0.375895  0.190574  0.186396  \n",
       "Subj: Man-woman                0.388016  0.179643  0.203911  \n",
       "Subj: SF, American             0.363445  0.209545  0.227276  \n",
       "Subj: SF, Other                0.389982  0.209787  0.227908  \n",
       "Subj: Short stories, American  0.221426  0.095434  0.099133  \n",
       "Subj: Short stories, Other     0.317023  0.084314  0.085037  \n",
       "Suspense                       0.386832  0.236303  0.275541  \n",
       "War                            0.403101  0.266369  0.298906  \n",
       "Western                             NaN  0.309861  0.308590  \n",
       "randomA                        0.309861       NaN  0.017277  \n",
       "randomB                        0.308590  0.017277       NaN  \n",
       "\n",
       "[34 rows x 34 columns]"
      ]
     },
     "execution_count": 11,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "tfidf_matrix = pd.DataFrame(cosdist)\n",
    "tfidf_matrix"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Compare cosine distance to evidence about social distance\n",
    "\n",
    "we'll skip the NaNs and also ignore ```randomA``` and ```randomB.```"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [],
   "source": [
    "social = pd.read_csv('../socialmeasures/pmidf.csv', index_col = 'index')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "(-0.25170840418984103, 1.3202157996454919e-08) n = 496\n"
     ]
    }
   ],
   "source": [
    "def compare_to_social(socialdf, otherdf):\n",
    "    ''' Compares two DataFrames, ignoring cells as instructed,\n",
    "    and exporting the results in two vectors for correlation.\n",
    "    '''\n",
    "\n",
    "    socialvals = []\n",
    "    othervals = []\n",
    "    comparisons = []\n",
    "    \n",
    "    indexlist = socialdf.index.tolist()\n",
    "\n",
    "    for seq, idx1 in enumerate(indexlist):\n",
    "        for idx2 in indexlist[seq + 1: ]:\n",
    "            if idx1 not in otherdf.index or idx2 not in otherdf.index:\n",
    "                continue\n",
    "                \n",
    "            otherval = otherdf.loc[idx1, idx2]\n",
    "            if pd.isnull(otherval):\n",
    "                continue\n",
    "            else:\n",
    "                sval = socialdf.loc[idx1, idx2]\n",
    "                socialvals.append(sval)\n",
    "                othervals.append(otherval)\n",
    "                comparisons.append((idx1, idx2))\n",
    "                \n",
    "    return socialvals, othervals, comparisons\n",
    "\n",
    "socialvals, tfidfvals, comparisons = compare_to_social(social, tfidf_matrix)\n",
    "print(pearsonr(socialvals, tfidfvals), 'n =', str(len(comparisons)))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAX4AAAEACAYAAAC08h1NAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAIABJREFUeJztnX+UHNdV579P/fvnzGgzGLCjaVnSSLIl2aMg8ELAoxBB\nkt04BmKMfRZOsBLs+Aj5hOzikZMgB0UQoWO8q7NxZAUtA3v0Y4Bssgkkaexlxnu0S7ad+IdCxgaH\nMMJJgG6yiw/GsiXbd/94VV2/Xv3qrqqunr6fc+ZI01Nddet11/fdd9999wkiAsMwDDM6rBm0AQzD\nMEyysPAzDMOMGCz8DMMwIwYLP8MwzIjBws8wDDNisPAzDMOMGLELvxBiTAjxh0KIZ4QQXxdC/FDc\n12QYhmHcySZwjf8E4AtEdIsQIgugnMA1GYZhGBdEnAu4hBB1AE8S0YbYLsIwDMOEIu5Qz3oA/yiE\n+F0hxBNCiBNCiFLM12QYhmE8iFv4swB2AvgEEe0E8BKAuZivyTAMw3gQd4z/WwCeJ6KvaL//EYB7\nzQcIIbhYEMMwTA8QkejlfbF6/ET0DwCeF0JMay/9OIBlxXGp/zl48ODAbWA72c5htnMYbBwmO/sh\niaye/QBOCSFyAL4J4BcTuCbDMAzjQuzCT0RPA9gV93UYhmGYYPDK3YDMzs4O2oRAsJ3RwnZGxzDY\nCAyPnf0Qax5/IAOEoEHbwDAMM2wIIUBpnNxlGIZh0gcLP8MwzIjBws8wDDNisPAzDMOMGCz8DMMw\nIwYLP8MwzIjBws8wDDNisPAzDMOMGCz8DMMwIwYLP8MwzIjBws8wDDNisPAzDMOMGCz8DMMwIwYL\nf490Oh08/vjj6HQ6gzaFYRgmFCz8PXDmzAKmprZgz567MDW1BWfOLAzaJIZhmMBwPf6AdDodrKys\noFqt4k1vejMuXlwEsAPAeZRKu3HhwrOYnJwctJkMw4wII1+PP+6wi9nDn5n5YQBjkKIPADuQy01h\nZWUllmszDMNETezCL4RYI4R4QgjxuTjOH3fYpdPpYO/eu3Hx4iJeeOGreOWVx3Dx4t8DWNKOOI/L\nly+g0WhEet1RhudPGCZekvD47wGwHMeJ7aJ88eIi9u69O1LBWFlZQTY7BbOHXyptQKHwLtTrO1Eq\n7caDD34cKysrLFQRwPMnDBM/sQq/EOIqAO8A8DtxnH9lZQX5fANxhl2eeOIp/PM/PwvgvPbKeQDf\nwZNPfhmPPvowHnzw4/jAB+ZYqCIgiY6cYZj4Pf4HAfwHALHM3jYaDVy6tAKzKEcZdul0OvjAB+YA\n3A9gN4DrANyABx/8OLZu3YpGo4EPfGCOhSoikujIGYaJUfiFEP8GwD8Q0VMAhPYTKZOTkzh58iGU\nSru7YZeTJx+KLLvGEKJfBfAsgN9BtboBO3deb/s7C1UUxN2RMwwjycZ47h8BcJMQ4h0ASgBqQojf\nJ6JfsB94//33d/8/OzuL2dnZwBe57bZb8da3vgUrKytoNBqRplQ2Gg288so3AZwGsAdAAa+99p2u\nEFmFSqZ2slD1jt6R7927G7ncFC5fvhBpR84ww8zS0hKWlpYiOVciefxCiBsBfJCIblL8LbV5/GfO\nLOA977kTly5NAvgOcrk1+L3f+x3cdtutlmP27r3bIlTmvzPh0ddMRN2RM8xqop88fhZ+FzqdDqam\ntgRaqMVCxTBM0vQj/HGGeroQ0WMAHkviWlGhx+8vXnTG7+3iPjk5yYLPMMzQsCpW7sYBTzQyDLNa\nYeF3Ie6MIYZhmEHBRdp86HQ6ePLJJwEAMzMzLPwMw6SCkS/SFiePPvpnuPnm2/CzP3uAV+YyDLMq\nGDmP31xe+cUXX/TMxAmT2RPkmpz1wzBMVLDHHxC9ANiNN/4crrnmTbjxxr2eXnwUK3OHpegYV8Rk\nmBGCiAb6I02In3a7TaXSWgIWCVhLwNMEEAFPU6m0ltrttsd7/I/1vqb1/cvLy9RqtQKfJ25Onz5L\npdJaGhvbSaXSWjp9+uygTWIYxgdNO3vS3ZHx+A3vvQKggSBefL+ZPaoRA/D9mJm5wXMEkKT3zRUx\nGWb0GBnhN/Ly/wXACoLm59922624cOFZPProw7hw4dlQ5RhUawEuXvxrvPLKf3cV2aRDQ1xojmFG\nkF6HClH9IKFQD5ER0igWGwSUqFTaFntoQ79mvT5DhcI4lUrrtbCP/KnXZ6jVahFR/6GlXhjENRmG\n6R/0EepJpGRDWjBX8gyS1RPHNd/0pjfDrZpnmDIRUcEVMRlm9Bi5dM5B41XN05k+uoRC4V148skv\nY+vWrbHaZU45BcDppwyTclJfndPTgBETfsA7r1/vGIjG8PLLf49SaSOAbydW7lm/fj4v5ye4zDTD\npBMW/lXGM888g5mZH8YrrzyGfhaOhSWqBWsMw8QPL+BKkCRSLV988UUUi1cj6UybKDN8eEEYw6QX\nFv4QJJVqOaiS0EGuG0TQh2W1MsOMLL2mA0X1gwTTOfsh6bRHcxpokqtpva4bZIUvp4cyTDKgj3RO\njvEH5PHHH8eePXfhhRe+2n2tXt+JRx99GLt27YrlmkGLu0VdBE51vqDxf6OdvgS5UK6Bev0nY22n\nQcCF95hBwzH+BEgi/GIPo0xOTmLXrl2ewhJHWEV1XVX8P5td54j/NxoNvPTScwA2A7gLwGZcvPjc\nqtq5jENZzNDT61AhyA+AqwD8GYCvA/gagP2KY+IYBfVEu932LJ52/PgJKhTGqVa7PlT4xe+8RL0V\nSksyrKK6FlCi48dPOI7L58csx+XzY6sm1MOhLCYtoI9QT9zC/70Artf+XwXwlwC22I6Jq11C4Se8\n+t9rte1UKNQdghf0vIcOHXaIRK9i0mq1aGxsp2sJCPs1+q0Ievz4CQJKBOzQKpwecdgZxqZhZLXf\nHzM8pFb4HRcDPgvgx22vxdIoYfAT3l6FWe0ll6lYHLd0LCoxqVa30fz8vGu56FarRcvLy4Hsiqrs\ncqvVolptOwEtAtpK0VvtHvFqvz9meBgK4YeshbwCoGp7PZ5WCYGfF9erl6d6HzBDwCmfjuUIASWq\n1fwza/bt2++Z/ROlUAU9V9IZSVGMZsLgd39J28OMJqkXfi3M8xUA71L8jQ4ePNj9WVxcjKWRvEjW\n419LQNvRcehiUq1u08Ipzmv1srFL1KGJoKLuJn7m0UoU4jioTWTc7o83tWHiYnFx0aKVqRZ+AFkA\nXwJwj8vf42mlkPgJmv3vqli913mBDQRMEHDWteNot9s0Pz9PtdqMUqh7EfE4QhO9erR6W5RK20mW\nxV7flzimKezSbrep2Wz6OhA8EmCiIu3C//sAftvj77E0Si/4PZjtdpsOHTpMxeJ4KI9ueXmZ7rzz\n/VQo1AN5ym7i0avQhQ29xCFQ7qOfxZ7FOi0TrXr7ViqbCdiotIdHAkzUpFb4AfwIgNcAPAXgSQBP\nAHib7ZjYGqYfVOLXi/AGyepxe4/X6tmw8fOgYh6XQLnPd7R6Fus0ePxWG9raqM4Zihu0nczqI7XC\nH8iAFAq/m/iF9TD7ESYvoe7HI/c7b1wCFYfHTzS40hY6zu/EWQLKVKns6NqTlpEJs7pg4Y+QKEMt\ncT3w/cbY9Q7t+PETlvPELVBGjF9OYBeLjUjEepCxc9V3olgcp2az2XdyAMN4wcIfEcbk6nZX8Tt+\n/ATl81Uqla525OOrzhdkss8tw0UlaEELpfmHqZwpo0kIVNRZPWkgyKhj0CMTZvXBwh8BxsrcGZLp\nlEcc4nf69FnK5WoElAnYSLlc3fcBdnvg/TJcVAIfRJiDhanaWpjFeZ4oBGoUs1eC3PMotgsTHyz8\nfaKOP5eoWt1mEd1icVw5eRckn7/ZbHaH/37xbrfJwGaz6RmKCR6mahFwned5wgiU+XjOXmGYZGDh\n7xNVbLtWu95SMqHVamnpetbjKpUdvrVx7GJ4yy23kj3tz5zhMj8/rxR4vzzxZrOp2bhMelkFs6AH\nWSQWFvu95XLVVMayg6TqsjfODBMs/H0SJIQSxuP3E0OgSMB4aI/fLRRjrC+Y0DqUEgHrCZigXK5q\niaefO3eOfu3Xfo0OHvxooJBO2CwgGQZrK0cSUX5eYUTaaxSyvLxMd911NxWLEzxKYYYKFv4ICDpB\nJ0W8TMAGyufHAtXGsYshsImAwwSMkVzRWyagQJlMpVv108se+2hCdkhlZUeSzda6C87WrClpnc46\nAoq0d+97PSda3SqS6tdXhZ7k/ZyKzeMPG0paXl6mQqFOwKLFpuXlZW3kVXC0XVpGKQzjBQt/RASd\noDPH6+2oFypZxVB65J/RvP5TWqcgO4hCod5d5GW3x35tQ9T2uoaOjGsvayI3QTJcNUFAno4efUAp\npEYHdkTrRK4joER33PFLnqOZfH6MCoU6VSrTvllPbu1v74iWl5dpfn6ezp075+hUC4VxWl5eVp7v\n9OmzVCiMEzCt3cNZAoiKxfXangEbCKgR4J7FxTBphYU/AYKGF1Qefy5Xt5Rr2LdvvybY0zaxXq+N\nAjZSoTBuqfl/+vRZTaw2ElCmNWtKlMvp5ygSUFd6/MZo45hiVFCmbLZiem2RCoV6V3hlWqt+npbW\nWVnnBvL5MSoWx033dg/J8gXXhQqbGFlOV5PMctpOpdJa2rPn7do1pwkoUD6/1dZmm6hQcGZXuU+g\n2+9h0XFP7PEzwwALf8yowgu6F6ryNu0LlfRUTXO5Bumtj3sKkL7DlVPEVMdWNWGT18zn19k88gXN\nw7WORIrFq7X/n9XeP93tdGTn1NBe30lyhPJ9Du+42WyG2h/AjnF/i2RNM/UXabfVv+qR1ybK5Spa\nCq359QbpYbdicWKoF5QxowMLf48EDe3IGLoRkslkKiYvtET79u13vM8ttmzPuZde+waSYRi7MO+g\nQqGuZeuY0y9b5Bwt7CCgScCp7nvMk8HOTJ5FWrOmZLLRKdgHD35U2Rm53VP/+xa0tA6mrf3/GMn5\nEPN9fh9lszXtdT1806ZKZbp7z26dUKEwrgwX6XMst9xya99iPUzprNxBDTcs/D3gtkDK/iAcOnRY\nE4admtCcIGfIpOTw/FutlsOzLJW2OXarkp3KJ0l65DUywipSjKvVbYo0zkWlIFcq25Rio9/X8eMn\nqFicoFxuHQElKhSupVyuTpmM3olZBXt+fp6KxW024d1MMqy0g+x77oZZ+Wtua6vHXyNjHmKMgLzj\nPj//+c9TPl/V2u2EdvxGyuVqlM+P+W5SY544LxYn6K673u86TxCGYSrNMEwdFKOGhT8kbnF4e7ll\ndZy4Rk7PfBPNz89brrG8vKwUZ7PAyM7hajJCKWXtPZu0f2/vCsfx4ydMo4Myybj+GMlJ3DLNzR3w\nnHTWhfbcuXPaeaz3ns9bX3NLK5W2ynUC1eo2hzcfpnyBua3dspPWrKlo9yrbZN++/bb5Dn2VtXtl\nzKDlMNy+K0G9YreJ/UOHDvu+N0mGqYNi3GHhD4l75s0+y4OgTlds+Ao6EdHCwgLlcm/QvOMZAtZS\nsdjoioguwsa5nMIFlOjo0Qe6ewCUyxsomy1RJlPWvOM2yRBU0ZFt47Zdo/SU7RlAG2hu7j5P77hW\nu94kssG9edXf3ETHGdKSI4+FhYXufIr7pG2T7Ivr+s3OCesVq22boGJxPFWiytVCVwcs/CFxe0Dl\n5KWxibjbStk77nifxTO3x/j37bvH5rnfSvoE5PHjJ7piIjNq9NGDHt+2CnIuVyF7GuaaNUXtvDs0\n0fuIozNSl2ogkuERZ3aPOT7u5h3rtnt5834espfoBPFE1Z22Pr8RvpyGm+3Oyfdg5zNCgzOkz0Gk\nTVTZ418dsPD3gOoBlQLSsjwIbqELt6weqxdveO76AijrA2cWbJXHXybgl01CrXv4JSoW30jAPBnl\nGZwx+larpd2n2cNvmzqSGdJX9/ZSl8dO0MqhXqITZCNzZ6etr1Sukjn8FSbEYh0VjbnOe/gJuCoZ\nII2iytVChx8W/h5QPaD2wmzmY4PEeY8fP6F58fZMlE107NgxlwqZR0iONDYSkCM5hzCjCXOB5KTv\nRjLSLfW5gBzJRVXuHv/CwoKWtWPtUHK5KhWL445FVqr7dLt3e8fnJuiqOYcg4u7V3vb3Hz36gCk7\nSXaOQcIrXhlAsgPpzSseFlHlrJ7hhoW/R+wPqH1jkjAcP36CjFRHp0d67tw5arfb2qTk02RUyNQF\n/TpN0LMEXEmZTEXLwV8kI9PFfE49LCXPPzv7FjKHlzKZshYvLxOwX7uG4QnbH3p9crVS2dztDMxe\ncLE4TocOHdZKHfwcmdNZ77jjfa7zJvr57FtO9is69qygQ4cOhxJb870VCnVFbv8MybIaa0kuEutt\nFXKUk8ejDreVFRb+Pojiy9RutzWPU5+Y1MVcZp3kct/T7VikmE+QXGhVVAj6BMk4fImy2YqWgbOW\nVDn+Mq5NVK1eZ/J4m5q3aj/n5wm4k/J5Z1in3W5r+wwY8wjZbMXkBZ8lPWVS2uwMZelF35zXNdIt\n4/B+VZ1T+NXVi4p7mtA6VmM1c9SY94Cwr9RmrHD6qZNUCz+AtwF4FsBfAbhX8feYmsWfqDwIa3kD\ns5gUSMbhpZgUCuPacW1NoN+jEPQZbTQwQ3rIYv/+exTCVNa8/iOm8xKpJ4nHyGsyutlskmrCt1y+\nlpybtpxS2LyJstkSzc0dIFmuYYd2vgNkr0Lab7xbnf8f7vyq0Umx2KBCQZaeyOfHKJerxhqqcZur\nYPF3wpPRalIr/ADWAPgGgCkAOQBPAdhiOya2hvEiSg/CCOHoBc3k4qZCYZ1FXGq16zXPXD/uWoWg\nm0sXGPX0ZQmFcU10JzQv3Ej5NB4MfZJ4UesE7lJcw5p+KoXfmeIpVyifsnUkbUUnUSI54byNCoUx\n+tEfvZHkyGC7duzZ7vv7yXCxf2aHDh3uKS3RTUjMOf9xhxWkszBja3O5UnvUBc0Op5+qSbPw3wDg\ni6bf5+xef1rSOfvxIGSoxBzCqVEmU1Ze4+jRB2xCfET7fYP27xUWYTfb1Ww2qVDYQOYSz8XitdRq\ntSzzFbL8sjmd1Fpfx77grN1uUyZj3zNggjKZspb3rxZ64/xF098/Q3Kks2g5V78ZLm6fmZygH74J\nWBkedO7JoFoUN+qwx68mzcL/MwBOmH7/dwCO2Y6JqVncidKDaLf1DdpnyKgxIz111YRjs9mkcnmH\nTYjXEfBBAs4RcB8BBSqXr3EIkt9qYH0RlDrdcVH5Hv19Quh16fX01v0kwz07tInmOulF54rFBmUy\nZW09wRvICP2cJRna0evonNDao0GVynTXS+/lgXX7zMJO6to/u0Fu/G4kBOjrMY4MnaAlNeE66I46\njQy98B88eLD7s7i4GE8rmYjKgzBvVGKIq6yzY15A5b1pyhHTw18judLXWZaZSC/xsJ6MDB1jNbD5\nGGd2zWbNC99EQJHe+c6bLfcqQz3rydgf4BwZdYNk+xSL49RsNi0i2Ww2KZ//Xu3cnyHrXID5vkp0\n000/5SiJEdVn1o/4DHrSUK+C2k8a8aBIuu3S3h5xs7i4aNHKNAv/DQC+ZPo9FaEeov49CKcQ3U5e\nFTutx+tZMubyD+paM/YUSHkOawdjj02rPP5yeSOtWVOkXK7ueFCNyd0jmuCXyVr9Uj0iMjxWPUyl\nh5TsE8LOUUc/HW1UXl9aQggqQetVVJMSx7S03SiTZuHPmCZ389rk7lbbMbE1jB/9PCTqxVhhSg20\nqVhcZwr7OLNxzGLrVjZB3/jEXvDMvj7Ba6N2Y45ijJzxfHW9e/cVtHqnZK25IzuSlvLekvrMvD/D\n/uyK0rZeRTVJDzyNE66jNiJIrfBL2/A2AH8J4DkAc4q/x9Qs8WJ9OJ1CZ38IVA9zsTiuyMZxZpro\nRdr0B/ro0QdsWxFaRwCq8Idf5UhZ8bJKqlXHQMGx6Et1vnx+K+XzNSqXr6EwdfwHRZRea5Si24uo\nJu2Bp83jH3TIbhCkWvh9DRhS4ScyvmzOTU7UD4EqVGF+LZerUj4/ZtmiUT5cG8nI9JGxc7nop06Z\nzFVklHJYS7ncG5UCofbQrZUjVYXJgDrlchXH3rzOukPWlEjnyERdGz8p3LzBKMJHcWSJhT3fIDzw\ntEy4pq0TSgoW/gHiFoJxewhUAmRflOReP2acvLcmNMpDqAhSOdKoda/X/c9TsbhV2bF53bM++Wuu\n1TOoobifN9ivXUE23enV5qDfp163veyXNIRX0hh2SgIW/pQQd/xZThybBaZFzoVXGx2bwpjtC1I5\nst1u08LCgmlzFnMoS6as6vnmUU5MxkEU3qDf5+qWZruwsKBMFQ36PfE7zt7Ogx5VDQr2+Fn4U4Pb\nQxvmoVdPnvptPu7cFMaMLhaVyg5PcXCvJKoXlFOXFwjyECbpJao2dwmzK1aQTsyZZjtGQIEKBbky\nu1Ra7wjt9dspurXzoNYkDJq0hJ2ShIU/Zbg93GEfevuX+dChw47wyk/8xNvJqw6PjjkkZa/C6Xa8\nVVj00s+LZJ9INuOs/28ddqvKLsQlUqdPn9VKZDhXEgct2xzEk7Sm2aqK5Mm/WSfz+/NMRzW84UUa\nwk5JwsKfIrw8sV4eer85ASL3TWF0zFUgg26faH5fvS4nkrPZdWSeSLYvHjNCSdbspGJxwmONQdmz\nA+oVmaKqVxzVayI1KMyuWGHE1RhJTTs6Pr3wXqUyrdxaste01lEMbxCNnsC7wcKfItzEYn5+PrCI\nRPnFVovtWjJvMemXJthq2fcH1s/jLPZWqWwmWarBmCDOZCp0+vRZl3mLGQJORS5a6oqjYyR3LAve\n6YYRV31CW93e0Xr8RKMZ3kjT/NGgYeFPEf16/FHXaHffo7YVSnj8MldkOYoJzdsdI0Cv7Nm2tIFz\n17NxApqRFydzqzhaLk+7CobXRLVZXINOupprG6nSd6MQrjR4v0nZMMqjHBUs/CnD7eHet28/mePx\ne/e+1/LAuE3o9iP+budU1YYJex79oWu3zTuLGdcwVxGt12fo3nsPUDZb0wS5TLKq5xh5TRb3ilFO\nwrApl6t300vtYuXlSZqPDepxmtMse83qGQaGYbXwampvMyz8KcT+ZbNOALZInyyt1YzOwa1GezZb\n6etL2+8Wk35rFdy8a7mTmBRduRdx9Kt53eZAZFvrWUjbLB2LXazcFqO5T+Kyx0k0HKuFV3NoiIV/\nCDB26WqRjDOrw0GqGu1Ag5rNZl/X79XrUYmk/TxuO3hlMmUyCrgVyFm/ZyN51e8Jm8euP9TOVFTr\nugO7eBQKdUeHq/Ikk8ikGSbvNO2rhVd7R83CPwQYoYfrSJZevlb5wMzNHSB7jXag7Cv8cQhGmHRG\nYyOaGQImKJeraqmUp0imOF7r6Oy8PH6/Dscv9GT/m15WutlsOsSqWt3m6HDN5/KqfBqlkAybdzoo\nYQ36XV/tKa8s/CnH+YAskqoEwvLyMjWbTS0sUtNCFFJEoxrOhukg3B6cZrOpnASV6wOmu5ueuy8A\nkzX6hSgovTe3eYlabXu3E5Cb32x3faitNZBqlM+PddtHdlL+5SdU7RpXJs2weqdpziwa1jYNCgt/\nimm320qRKhSMzb1LJWO5/djYTsrlqpTL1boiGtVwNqxHqTp3Pj/muqGKeT2B8736xiwNAgqUzVZc\nM2S8M5Hk3IjMh3dfk6CnVi4sLLjeg12svO2PZuMXN4bZO01zeCrNHVO/sPCnFL+FU3rGhyrVUw9N\nqB4m84MWVDB69X7smUgybq8uHW3vVPQN4mu167tetb1wmwp1ltBaAuz7FcuRQLl8taWDNHdwhULd\nkYaqGrXYO8VbbrmVvFYge9luz+YJIoy9fD5xCW6ahbwXVtv96LDwp5AwaZS9rBANm5HiV0pBZX+z\n2dTy7hc1b/uTjnMUi+u1Ym7WDkH3qmu17VQo1EOlalrnDK7XxP4jJOdGnHV3isVNlhz7IGE1dRaQ\n+T1F8tsRze2zKZWuJlmjZ7sjzOSVURXEO7VnWEU9HzBs8wz9MsydAgt/ClGJea12Pc3Pz/ecJuh2\nnF9J6HZbXUrBLyQkwyll0rdflLH6MjlF9RTZdw+T2TynQo0unG2nb15/gmRW0NXknCAeI/MiMevk\nrXx/ofBGS1jN3j7Oz6pFcoRzlozCa2XPwm7WdF33zXXMcxVBF5HZPxdjj+doY9erPSZuxxiRDmcY\niIU/BdgfWFVqptdDFMTb8xoZeAmGUaHSX8jUKaUTXXHN5apdO40witseuyccdgZtS1XKpbTLOkFs\nv4YxStlHclWwnAs4evQB1/bxHiW0CTjlW9TN+GzMW2g6t9PsZdW000b/Hd96YZjnGcKiWuA3bJ0c\nC/+AcauLLkv1lqhUCrZK1m/Y2esCFimEuqfuLmRGNctpm1htoErFKHVgjmMb9ugbyG8gI4MnXG0c\ncxt4ZdnI9Mu6lv3kHP3kcnXNjgnNLvcRlN7ed9zxPk0I9FXF+wN7+9bPxs/jD14nyYy6THbyHv8w\nh0Z02u229j23dp612vVD1cmx8A8Qt1i+kZ++SIVC3bNGfhjsE6bBM34MYXYLBzmFy7ifhYUFn7CQ\n7oHfRzJvv62JaIFyuWqgYXSQxWJm4VGtSHZ+FlJo7RvXm/cxlh1FnuRag6r2E9zbt9tfLDa6Hb6+\nnWatps9VBKuMasctSyrI9yAMXiPP1RL/NxZT2keVwT7ntJBK4QfwWwCeAfAUgE8DqLscF1e7JII6\n9XATea1I7RVzjNdtwtTsjTvTSNtUqUwrF4NZ70MPCW0iYNxRftl8LT1l8lOf+hRlMnrtnZ1aJ1Ml\nt7r9qnP1ktVizhIKUv3TWkxuvDsiMIezpN1l381q3Gxyy+oJuj2nG26lN6LefEXl1a+m+L9xL9aw\nYZS1opIgrcL/VgBrtP9/HMBvuhwXU7Mkg7/HH/8w3CwsxeI4FQrfT0CRikV9E3h/L1Md564T8Bnl\ne06f1vfm1UMjeZLhlTGScfentfcvR177Xr++7n3qC8bU+xSXKZerdUNUzjRRfXJ4xtRZ76B8vuqb\ndhoUs5D2Gyqxvz8pLzwt8f+oQk32sOGwiT5RSoXfchHgZgD/1eVvsTRKktg9sTj2PnV78PT8eWOt\nQEUTYyPDEWkFAAAbUUlEQVS+HbQapxGqkBuXFAoN5XvUnZ3ZY9YnXTcRUPNdeayf0y7K+fxYVyzN\nnr06hLWRSqW1pli9XvJiHxUK9e7IRF1M7rdIrpRe7toflRDEKcxJeuFp8Pijbsthn68YBuH/HIDb\nXf4WS6Mkjf1LFOZLFeRY1YOn2tjDKsAyvu2WRqq6xtzcfZTPV6lS2erqCbVaLcU+tlaPWQqpXAOQ\ny9UDXXvNGn1PYblYbM2aIh0/fsIysshkKjQ3d8CU7unM/qlUtmm2tLWOoUyVynVaVlDBMSKQoxXj\n/F4ZQGGIWyx7zfLqlUGugk1Dx5M2Bib8AB4BcN708zXt33eajvkQgE97nIMOHjzY/VlcXIyxqZLH\n7wEM48XYHzxrPRyVAFvj21426ee2jhbUE9P+Hr+sKAoc7nrVflkxRoXPRdL39AVKitDMBAFFLaPH\nuX7AWnBNnUcvwzsz2t/yZA7L5XJ115IUYYk7POK3riOuUcYgvOS0hJoGyeLiokUrU+vxA3gPgP8F\noOBxTDytlAL8RL3XCU2vapFWAbbuZ6tns9htUp+npgn4NBUKznpB1rRJ3Ws2VxQdN9nhv7m5uqb/\nldo8hb1j20iZTIVyOWeNf7Pwqfa/LZW2aSOaacrn61rKrfn8zoVnvcT69fBU3F5qkMymfq+ZhpBI\nVB5/Gu4lKlIp/ADeBuDrAP6Vz3HxtMqACfJFDVP90g3zg5/Pj1EuV6VKZQcVixN06NBhyySgNZ/f\nKmzWlbLLyuPsE4q12nbK56s0N3cfHTz4Ue3a27QOoaCJ9FoCDlu2VnTLGpHvM66Zy9W0DBx7xzZO\nQIPK5WnKZquUyTjnMLyE16tGkrxvY+cwuYZhcyjP2dzh6+mccYZHzO0ZxjMOIoJpSuHsN9SUpnuJ\ngrQK/3MALgB4Qvt5yOW4uNolMVQPUJAHUK6SrZM51JDJVKhYnPDdAtDt+t6peM7QSKWygxYWFiiX\nq2miulPz9jcobVd1aPa6PEePPqDd1y1k1NeRE6ZuufpyBFHVhHcD5XL17qKtTKZqGllUtRHFBMkJ\nZDmx67Y/sZ9Y2P9uL9lsHkH1mpbqVXAvaoJ6xkFEMI1x9bAeuzm9Nm330i+pFP7ABgy58Ls9QH4P\njTUdskTAFSQLg7lXv3SLPbuJvTWff4bUpRVkOMhadXOR3Jazq3Pl9fCIXPCUzVaUq2fVk9ElqlS2\nkluJhHZbrzO0VxP9a7RjH3Dci1eqqle5BnPGkP55lsvbyVqnKFhMWZbH2GwZNfQai+41LOHX2QUV\n9GGPqxt7RGymfL5Kqiqtw3IvKlj4B0QQcVc9gOqY+pgmNDOWL2eptE3znq0pmvp1vDYL0atEFotm\nYT1LMlRiFWV7iKNYbCgLm6ltL2vet17iwLp2QM8uqlSmFdlAOwh4P7lVDnWWKmiRnDj+dbKPXvwe\nZDeRt3emMpOo6tr5uaGeIO/Ns+w3LOHVacRdyjsKoljvYB3FjpOch2KPn1j4+yPIA6SLzcLCgmWF\nqTod8mrtC+q+GMxcgsAthm2UUlYv8c/nq1QobCB7LNs+qWlfEarbbl+BKjNs7LH4tabzy+witce/\nlmQ4yPr+fH7MdUOUfH5M6wzd5yHs2BecZbMVR8aQfs/G9c6S7JCv7GlDHPvkelDiFtww5x9ECmcU\nsXi3faDz+fqq2ZSFhX9ABHmA7IKTy1VdaspMkLHqdYKADVQojCuyTowUTdX+saXSNiqXp0lVGbJa\n3dbdYcotTu8XC1fV0VHV+jdXoTQLoDExrNeu2afZaa0cCuS6NX5U4mPOUPJ7kN1HWNa2rddnaH5+\nXlm6QpXZZEblBFQqO3z3Sg56rqjDEmEEPclMmDCdkpdd6iyxDbSwsMBZPSz8/eP1ALmlWxaL46aF\nSXLSMpORnUK9PuNbgkAXUfewS5FUxdb8wlBek8deD6PaDn2bxTJlMtaVsOaRg6pyqL7wy5wGGmRi\n2w31COt6si/msnr83u1nJ0ovPakQSxpTG4N2ekFSpd1Wgq8WWPgHjNsD5BbSqVSmu8fbSxG4Layq\n1a6nQmGc5uYOOP4uUx43kFF0TIZ1CgWjSqTbwxHkwQ/yMNo7kkymRDIO712WWffc5aKrTZrYnnW0\nVT+4dcDZbEXZabuVp/bzuqMMiwxylewgCdLphc1c6qXY3jDAwp9SvDx+L7G1C7IsxVxX7twk0yBr\nBKwzCb81rNOvVxf0QdPtnpu7T/Pid3aF3E80Vamtbm3Vi6dqhNzkCEsPI9k7X6s9zo10guwX7JVB\n1Esq4mryUoPg1+lFvVZhWGHhTzFuguN1vD2W7ia6ql2EpNAu9p07bhfE48dPUDZb870PI/1ST+8M\n1tmp2iqbrVgWoanaJ2zJZLvIe51PtTiu1wnH1bZ4KG78OtBBZRulCRb+lGLOpVd5larj7V9ouemK\nM/+42WwqdxECNlEmU7Zs8O0nMvaHzD4hvWZNiYTQY/Y10ssuqx42OdFr9faD1Oox29JsNuneew84\nhDLqBz5oWMHYzrG367JQRc+ohsLMsPCnkF48PNUQVsb26w7RaDabyl2EVGUO7BOxXvXcvTOOzEXY\nmlSpWMswqAQyqLdvxk0oVVlM/WS7uLW3/Xz9ZtmkcSHUagiBrIZ76AcW/hShC2AvHp6b4Kl2bjJi\n4h8h8y5Cc3P32UTG2HXLPFdQLI7T3Nx9ihFGncrlHbZRxAzJiU696ucG0ksnm8swyIlsfbWrsdjq\nzjvf79rxqPCqYRS3x6+qxd+vx542j5/DTqsDFv6UYAjgZrLnEJfL2+nYsWOB4tyqIaxZMI2VudtJ\nZu+8sVs73yoyJ7TQzHrKZvWVqNdp3nuFgKscdlrLGrt5/BPd/6sXZdVJTjRfp9m3jkoluUmKDF2F\nz7vXhdIvfTasB2jMkxiVRb0yRKLaNnFQYpu2TojpHRb+FGB9oNzqwK+nfH4sdMzd/Tq6lz5uqZl/\n+vRZbTWtLvRrNaG3i/myw059hGGeZJXnKZt+N+rXqMswWFcB6xPOYbakDCvwvXqxrZa+8ba+cUvw\nDJEwHY1b9lDSpDHslHbSGlJi4U8BztWrZ01iGWzf2yAELRPh3MDEXm54RhM7fYeqHY4RRrPZpLm5\n+6hYHKdqdRvlchVt5GCcV+3xu13Lugm9Hk93e7DMr0eR5aE6R68ecJiOJk2hFfb4wxFkodigOgUW\n/gFjpDBavedMpkJAlmQNHkOoK5UdPXtYQR5cZyfUJuBKAj5p8/iNcI2XJ6oKM5k9cf/SxmqPXy+l\n7CeKfg9fmAVmfmmbQbOggopnGoU2LWGntOP32Q26Q2fhHzCG8FjrzUjhX6QwS/+D4BcGsXZC+mbk\ncuSRzb5RE+ACARssoaegoQw3z1l/TZ9ELha3kHkFMXA7mSeijx59IFA6Zb/HBD1HUM8tTLgkztBK\nP95mWsMXSePVDl4lttPQobPwDxhnfP8U5fNVrQY+mTqEjd0NRoKeN+wqUGsn5NxxSwrwZ8he997u\nvezbt9+R5uklFLo9ugdfq81QoTDe3bhcf11OHte75/MTxbC1W1Sdoaq0s74jmB7SMldPDfK5DNrj\nH7S3uRoIMgp0K7GdhrkSFv4UYBceZz68euNyv/OFjS1ahWZB8/TN8fZNBMyTeSJTlSbpLAddUpaM\nMNsqOzr3+QzViCIKjz94mxzROmCZbXTHHe/Tyl0YE9d+K6vt9xykyF3UoZUkvc3VOjLwakN1mq9z\n/2r2+Fn4ich9cVTYB77f2OLp02dtgmZ4LFKYryVz6qJqYZR9ItZcZtkvjGKuxR9FYbMohFNd3qJE\nMk3V/Np44AVnqrmPJCYBk/I2V/OowqsNg5bYHvRcCQt/iunlgff6UqrCSnahcvNYZLnmIxbhc+b+\nqzz+RZK5+csEyJr/usiobHXrJPppo36FU6Ztztjs3EjAZoftYSuCJu39JXG9NHi0cRLW4w87ykyC\nVAs/gA8CeB3AWpe/x9MqKafX9ETnRPJOAsqWWjiqSSm5QYt1A/VyeXvXi7F7L3qM35iYNUYJQKkb\nsnLrNKpVdSlo8z0mmdfu3rn17vHrDCLeG7e3mYYYdtx4teGgvfkgpFb4AVwF4EsA/oaF3yDIENpr\nBa8qdVTvGLwmpZx1dJxxS3NntLy8rFgPIFcKN5tNS5hDhpauJKBEuVy1W1VT1cFZQ1HrKZMp09Gj\nDyTW7ubOTaaeho/xmxmUdxynt7naPX6dXhIo0kKahf8PAWxn4TeQYlonc9nisMNI1VaH7pO0zm0P\n7XF/t+urwyPbKZOR5yyXN1A+X6Xbb/95zXvWi8YdsUxw2ytsyg6oTqrJ1rhRTTD7ZfUEEYBh8BDD\nshrvaTWRSuEHcBOA39b+z8JP8kHKZMqah7ldE78ToYfQbt6YapLWPinllZtsv4Zbto8s0ax7yutJ\ntSdApbLFdSMTGXJaT/b1DeYQUloIM8GZdg+xF1bjPa0W+hH+LPpACPEIgCvMLwEgAB8GcB+APba/\nKbn//vu7/5+dncXs7Gw/ZqWSTqeDO+64C6+9lgdwAMARAOsB3IOXXlqDRqMR+DwrKyv49V//ED78\n4R9FLrcOr732HZw8+RBmZmZw6dIKgPMAdgA4j9df/xZmZma675+ZmcHrr3cA/B2ASQDncfnyhe71\nO50OHn74Uzh8+CgKhavx6quXkM//GAqF9bh0aQWzs7NoNpcA5AAsAXgFwHu160H79ypcuvTXKBan\n8corxuu53BQA4NVX/wHAZQBbHO9rtVrYunVr8IaNkU6ng71778bFi4u4eFG25969u/HWt74Fk5OT\njuMnJyeVr/drw8rKChqNRuTnDkIc98T0xtLSEpaWlqI5Wa89htcPgG0A/h7ANyG9/csAVgB8j+LY\n+LrEFNFqtTRPW1VDvx7Io3JW5Wx0yx7Yj+klPdJtfkAv6SDDVFWSVT3NC6KcnrvXqlyZWplVjBTS\n5fEPeoJzNadTMv2DNIZ6LBeR4j/h8rdYGiVtGLHtGtl3zapU5P64XvHl5eVll3z5ReWip7DpkV75\n+Ob8ZlnJskbWyeUjZFQCLdPc3AEi8u6EpPgXtPdtJKBE+/btj6q5IyGptEnVZ+W25y+HXBidYRD+\nb4Jj/LZyyVbRVNWoN3t8hcI4lUrrLR2GXvWyFy/ULjjqfPwZAk4p8puPmGL8G0iWfL6PgE8q1xR4\nZU0sLCzQsWPHUuXpm4lzgtPNoz99+qy2yc70wEYbTPpJvfB7GjBCwk8kxW5u7gAVCuNUqWxzhDu8\nl43byyioPX4/VIITZJm6+b3V6jbK56u0c+cuza7pUF77ME0axmGr22jCGNktOkJo7PEzZlj4h5B2\nu03z8/OOdEmvZeOlkixwVirJDqNYbITONPEKXxg7iO2gYnGim4tvZ3l5mebn5+ncuXM9hUI4dm0e\nYelbVMqQ2vz8vOlz1xfpbaJCYTz2dhqmzphh4R9aelk2vry83I35B80tNwus14Tl6dNnqVgcp0pl\ns8PTV51TdkLbQ4UjRmVhkB/tdltbxDZBcqJ8gnK5qmIuJ1xxv17hznj4YOFPOV6eVFzLxv1DCcFe\n964BtOgaqnJj0JkyaaHddu6Sls+PWUZeUc8ruH0PuTMeTlj4U0wQTyqOZeN+nr1dWHqtjV8sypTS\nOHavWs34tXfUYRev7yF3xsMJC39KGaTI+V07TOzf75xBwk5muBRA8jX1/b4L3BkPHyz8KUWdIrnB\nUkkzTsIKbFK18Yl4IpEouQ4wzJ7Eo9wZDxv9CL+Q7x8cQggatA1x0el0MDW1BRcvLkIvoQDMolgk\n/O3f/lUiS+HDLvkPcvygywisJpJoS9X3sFTajQsXnrVckz/X4UIIASJyLYXj+d5Bi+5qFn4A+NjH\nfgMf+chhAJsBXADwEOr1I3j00Yexa9euAVvHjApnzixg7967kctN4fLlCzh58iHcdtutgzaL6QMW\n/hTT6XSwbt00Xn75E5A16/5O6W0xTNywR7+6YOFPOextMQwTNSz8QwB7WwzDRAkLP8MwzIjRj/Cv\nidoYhmEYJt2w8CdMp9PB448/jk6nM2hTGIYZUVj4E+TMmQVMTW3Bnj13YWpqC86cWej7nNyRMAwT\nFo7xJ0TQRTRh0LOF8vkGLl1a4WwhhhkhOMY/BKysrCCfb8C8uXguN4WVlZWezmfeCPyFF76KixcX\nsXfv3ez5MwzjCwt/QjQa0iuXZRsA4DwuX76ARqPR0/mi7kgYhhkdWPgTYnJyEidPPoRSaTfq9Z0o\nlXbj5MmHeg7zRN2RMAwzOsQa4xdC/DKAuwG8CuBPiGhOccxIxPh1olzIxSuCGWZ0SeUCLiHELID7\nALyDiF4VQryBiP5RcdxICX/U8IpghhlN0ir8CwAeJqI/8zmOhZ9hGCYkac3qmQbwY0KILwshFoUQ\nPxDjtRiGYZiAZPt5sxDiEQBXmF8CQAA+rJ17gohuEELsAvAHAK5Wnef+++/v/n92dhazs7P9mMUw\nDLPqWFpawtLSUiTnijPU8wUAR4joMe33bwD4ISL6ru04DvUwDMOEJK2hns8CeAsACCGmAeTsos8w\nDMMkT1+hHh9+F8B/EUJ8DcArAH4hxmsxDMMwAeFaPQzDMENIWkM9DMMwTAph4WcYhhkxWPgZhmFG\nDBZ+hmGYEYOFn2EYZsRg4WcYhhkxWPgZhmFGDBZ+hmGYEYOFn2EYZsRg4WcYhhkxWPgZhmFGDBZ+\nhmGYEYOFPySdTgePP/44Op3OoE1hGIbpCRb+EJw5s4CpqS3Ys+cuTE1twZkzC4M2iWEYJjRcljkg\nnU4HU1NbcPHiIoAdAM6jVNqNCxeexeTk5KDNYxhmxOCyzAmwsrKCfL4BKfoAsAO53BRWVlYGZxTD\nMEwPsPAHpNFo4NKlFQDntVfO4/LlC2g0GoMzimEYpgdY+AMyOTmJkycfQqm0G/X6TpRKu3Hy5EMc\n5mEYZuiILcYvhLgOwHEARQCXAdxNRF9RHDcUMX6dTqeDlZUVNBoNFn2GYQZGPzH+OIW/CeABIvpT\nIcTbAfwqEe1WHDdUws8wDJMG0jq5+zqAMe3/4wC+HeO1GIZhmIDE6fFvAdAEILSfHyai5xXHscfP\nMAwTkoGFeoQQjwC4wvwSAALwIQBvBbBIRJ8VQrwbwJ1EtEdxDhZ+hmGYkKQ1xv9PRDRu+v0FIhpT\nHEcHDx7s/j47O4vZ2dlYbGIYhhlWlpaWsLS01P39ox/9aCqF/+uQmTyPCSF+HMDHiWiX4jj2+BmG\nYULSj8efjdoYE+8DcEwIkQHwMoBfivFaDMMwTEC4Vg/DMMwQktZ0ToZhGCaFsPAzDMOMGCz8DMMw\nIwYLP8MwzIjBws8wDDNisPAzDMOMGCz8DMMwIwYLP8MwzIjBws8wDDNisPAzDMOMGCz8DMMwIwYL\nP8MwzIjBws8wDDNisPAzDMOMGCz8DMMwIwYLP8MwzIjBws8wDDNisPAzDMOMGCz8DMMwI0Zfwi+E\neLcQ4i+EEK8JIXba/nZACPGcEOIZIcRP9GcmwzAMExX9evxfA/BTAB4zvyiE2ArgZwFsBfB2AA8J\nIXraFDgtLC0tDdqEQLCd0cJ2Rscw2AgMj5390JfwE9FfEtFzAOyi/i4AZ4noVSJaAfAcgB/s51qD\nZli+DGxntLCd0TEMNgLDY2c/xBXjvxLA86bfv629xjAMwwyYrN8BQohHAFxhfgkAAfgQEX0+LsMY\nhmGYeBBE1P9JhFgE8EEiekL7fQ4AEdER7fcvAThIRP9H8d7+DWAYhhlBiKinuVNfjz8EZgM+B+CU\nEOJByBDPRgAt1Zt6NZxhGIbpjX7TOW8WQjwP4AYAfyyE+CIAENEygD8AsAzgCwDupiiGFgzDMEzf\nRBLqYRiGYYaHxFbuCiHeJoR4VgjxV0KIe12OOaYt+npKCHF9UrbZbPC0UwixWQjxv4UQLwshfmUQ\nNmp2+Nl5uxDiae3nnBBie0rtvEmz8UkhREsI8SNps9F03C4hxGUhxE8naZ/p+n5teaMQ4p+EEE9o\nPx9Oo53aMbPaZ/4X2hxh4gRoz3+v2fiEEOJrQohXhRDjKbSzLoT4nKabXxNCvMf3pEQU+w9kB/MN\nAFMAcgCeArDFdszbAfyJ9v8fAvDlJGzrwc43AHgTgEMAfiVpG0PYeQOAMe3/b0txe5ZN/98O4Jm0\n2Wg67n8A+GMAP53StrwRwOcG8Z0MaecYgK8DuFL7/Q1ptNN2/L8F8Gga7QRwAMBv6m0J4LsAsl7n\nTcrj/0EAzxHRBSK6DOAs5CIvM+8C8PsAQDL7Z0wIcQWSxddOIvpHIvoqgFcTts1MEDu/TEQvaL9+\nGYNZRxHEzpdMv1YBvJ6gfUCw7yYA/DKAPwLQTtI4E0HtHHSyRBA7bwfwaSL6NiCfqYRtBIK3p85t\nAM4kYpmVIHYSgJr2/xqA7xKRpz4lJfz2BV3fglOI0rDoK4idaSCsne8F8MVYLVITyE4tSeAZAJ8H\ncEdCtun42iiE+H4ANxPRJzE4YQ36mf9rbcj/J0KIa5IxzUIQO6cBrBVCLAohHhdC/Hxi1hkEfoaE\nECXIUfOnE7DLThA7/zOAa4QQ3wHwNIB7/E4aZTonk0KEELsB/CKANw/aFjeI6LMAPiuEeDOAjwHY\nM2CT7PxHAObY6qC9aje+CmAdEb0khHg7gM9CimzayALYCeAtACoA/lwI8edE9I3BmuXKOwGcI6J/\nGrQhLvwkgCeJ6C1CiA0AHhFC7CCiF93ekJTH/20A60y/X6W9Zj/mjT7HxE0QO9NAIDuFEDsAnABw\nExH9v4RsMxOqPYnoHICrhRBr4zbMRBAbfwDAWSHE3wB4N4BPCCFuSsg+HV87iehFPXRGRF8EkEu4\nLYFg7fktAE0iepmIvgvgfwK4LiH7dMJ8N38OgwnzAMHs/EUA/w0AiOivAfwNgC2eZ01ogiIDY4Ii\nDzlBsdV2zDtgTO7egMFMRvraaTr2IORq5URtDNGe6yCL490wCBtD2LnB9P+dAJ5Pm422438Xg5nc\nDdKWV5j+/4MAVlJq5xYAj2jHliGr/F6TNju148YgJ0tLSbdliPb8BGRlBECW13kewFqv8yYS6iGi\n14QQ+wD8KeQo4yQRPSOEuFP+mU4Q0ReEEO8QQnwDwL9A9mKJEsRObcL5K5CTKK8LIe6B/NK6DqsG\nYSeAjwBYC6Mk9mUiSrRCakA7f0YI8QsALgG4CFnOO202Wt6SpH3diwaz891CiPcDuAzZlrem0U4i\nelYI0QRwHsBrAE6QXPSZKju1Q2+GHJ1cTNK+kHZ+DMC8EOK89rZfJaL/63VeXsDFMAwzYvDWiwzD\nMCMGCz/DMMyIwcLPMAwzYrDwMwzDjBgs/AzDMCMGCz/DMMyIwcLPMAwzYrDwMwzDjBj/H23Vy3/n\n4N6MAAAAAElFTkSuQmCC\n",
      "text/plain": [
       "<matplotlib.figure.Figure at 0x124eef0b8>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "plt.scatter(tfidfvals, socialvals)\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Interpretation\n",
    "\n",
    "The correlation is significantly worse than for topic vectors, and certainly not competitive with predictive models.\n",
    "\n",
    "One underlying problem (for both tfidf and topics) seems to be that raw distances interpret many genres as being relatively close, and only a few as being remote from each other. But predictive distances, and social distances, are going to be structured differently: many genres are relatively remote, and only a few have significant proximity.\n",
    "\n",
    "#### calculate uncertainty "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(-0.25170840418984103,\n",
       " 1.3202157996454919e-08,\n",
       " -0.33238605541439592,\n",
       " -0.16737385845789604)"
      ]
     },
     "execution_count": 19,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "## Careful reading about confidence intervals has led me to conclude that bootstrapped\n",
    "## results are not always reliable in this case. \n",
    "\n",
    "## I am relying instead on a function written by Zhiya Zuo, in this post:\n",
    "## https://zhiyzuo.github.io/Pearson-Correlation-CI-in-Python/\n",
    "\n",
    "# It matches results from cor.test in R.\n",
    "\n",
    "from scipy import stats\n",
    "\n",
    "def pearsonr_ci(x,y,alpha=0.05):\n",
    "    ''' calculate Pearson correlation along with the confidence interval using scipy and numpy\n",
    "    Parameters\n",
    "    ----------\n",
    "    x, y : iterable object such as a list or np.array\n",
    "      Input for correlation calculation\n",
    "    alpha : float\n",
    "      Significance level. 0.05 by default\n",
    "    Returns\n",
    "    -------\n",
    "    r : float\n",
    "      Pearson's correlation coefficient\n",
    "    pval : float\n",
    "      The corresponding p value\n",
    "    lo, hi : float\n",
    "      The lower and upper bound of confidence intervals\n",
    "    '''\n",
    "\n",
    "    r, p = stats.pearsonr(x,y)\n",
    "    r_z = np.arctanh(r)\n",
    "    se = 1/np.sqrt(x.size-3)\n",
    "    z = stats.norm.ppf(1-alpha/2)\n",
    "    lo_z, hi_z = r_z-z*se, r_z+z*se\n",
    "    lo, hi = np.tanh((lo_z, hi_z))\n",
    "    return r, p, lo, hi\n",
    "\n",
    "pearsonr_ci(np.array(tfidfvals), np.array(socialvals))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.5.2"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
